summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/a.out.h18
-rw-r--r--include/linux/acpi.h55
-rw-r--r--include/linux/acpi_apmt.h19
-rw-r--r--include/linux/ahci_platform.h8
-rw-r--r--include/linux/amba/bus.h1
-rw-r--r--include/linux/amd-pstate.h77
-rw-r--r--include/linux/arm_ffa.h121
-rw-r--r--include/linux/ata.h70
-rw-r--r--include/linux/avf/virtchnl.h14
-rw-r--r--include/linux/backing-dev.h10
-rw-r--r--include/linux/bcm47xx_nvram.h6
-rw-r--r--include/linux/bcma/bcma_driver_chipcommon.h3
-rw-r--r--include/linux/binfmts.h3
-rw-r--r--include/linux/bio.h4
-rw-r--r--include/linux/bitmap.h13
-rw-r--r--include/linux/bitops.h31
-rw-r--r--include/linux/blk-cgroup.h5
-rw-r--r--include/linux/blk-crypto-profile.h12
-rw-r--r--include/linux/blk-crypto.h14
-rw-r--r--include/linux/blk-mq-pci.h4
-rw-r--r--include/linux/blk-mq-rdma.h2
-rw-r--r--include/linux/blk-mq-virtio.h2
-rw-r--r--include/linux/blk-mq.h47
-rw-r--r--include/linux/blk_types.h10
-rw-r--r--include/linux/blkdev.h73
-rw-r--r--include/linux/bma150.h4
-rw-r--r--include/linux/bpf-cgroup.h17
-rw-r--r--include/linux/bpf.h507
-rw-r--r--include/linux/bpf_local_storage.h17
-rw-r--r--include/linux/bpf_lsm.h6
-rw-r--r--include/linux/bpf_mem_alloc.h28
-rw-r--r--include/linux/bpf_types.h2
-rw-r--r--include/linux/bpf_verifier.h96
-rw-r--r--include/linux/brcmphy.h1
-rw-r--r--include/linux/btf.h171
-rw-r--r--include/linux/btf_ids.h4
-rw-r--r--include/linux/buffer_head.h63
-rw-r--r--include/linux/cache.h13
-rw-r--r--include/linux/can/dev.h21
-rw-r--r--include/linux/can/platform/sja1000.h2
-rw-r--r--include/linux/can/skb.h57
-rw-r--r--include/linux/ceph/messenger.h4
-rw-r--r--include/linux/cfi.h59
-rw-r--r--include/linux/cfi_types.h45
-rw-r--r--include/linux/cgroup-defs.h25
-rw-r--r--include/linux/cgroup.h139
-rw-r--r--include/linux/cgroup_refcnt.h96
-rw-r--r--include/linux/clk-provider.h86
-rw-r--r--include/linux/clk.h2
-rw-r--r--include/linux/clk/at91_pmc.h6
-rw-r--r--include/linux/clk/davinci.h8
-rw-r--r--include/linux/clk/spear.h14
-rw-r--r--include/linux/clkdev.h2
-rw-r--r--include/linux/compat.h2
-rw-r--r--include/linux/compiler-clang.h37
-rw-r--r--include/linux/compiler-gcc.h27
-rw-r--r--include/linux/compiler.h23
-rw-r--r--include/linux/compiler_attributes.h10
-rw-r--r--include/linux/compiler_types.h24
-rw-r--r--include/linux/completion.h1
-rw-r--r--include/linux/configfs.h3
-rw-r--r--include/linux/console.h129
-rw-r--r--include/linux/coredump.h2
-rw-r--r--include/linux/coresight.h23
-rw-r--r--include/linux/counter.h186
-rw-r--r--include/linux/cpufreq.h28
-rw-r--r--include/linux/cpuhotplug.h2
-rw-r--r--include/linux/cpumask.h144
-rw-r--r--include/linux/crypto.h5
-rw-r--r--include/linux/cxl_err.h22
-rw-r--r--include/linux/damon.h99
-rw-r--r--include/linux/dax.h2
-rw-r--r--include/linux/dcache.h7
-rw-r--r--include/linux/debugfs.h25
-rw-r--r--include/linux/delayacct.h16
-rw-r--r--include/linux/devfreq.h7
-rw-r--r--include/linux/device.h15
-rw-r--r--include/linux/device/driver.h1
-rw-r--r--include/linux/dlm.h5
-rw-r--r--include/linux/dma-buf.h17
-rw-r--r--include/linux/dma-iommu.h93
-rw-r--r--include/linux/dma-mapping.h5
-rw-r--r--include/linux/dma-resv.h16
-rw-r--r--include/linux/dma/hsu.h6
-rw-r--r--include/linux/dsa/8021q.h31
-rw-r--r--include/linux/dsa/tag_qca.h8
-rw-r--r--include/linux/dynamic_debug.h176
-rw-r--r--include/linux/edac.h30
-rw-r--r--include/linux/efi.h94
-rw-r--r--include/linux/elfcore.h13
-rw-r--r--include/linux/entry-common.h1
-rw-r--r--include/linux/etherdevice.h22
-rw-r--r--include/linux/ethtool.h25
-rw-r--r--include/linux/eventfd.h11
-rw-r--r--include/linux/evm.h49
-rw-r--r--include/linux/export-internal.h6
-rw-r--r--include/linux/f2fs_fs.h40
-rw-r--r--include/linux/fault-inject.h7
-rw-r--r--include/linux/fb.h21
-rw-r--r--include/linux/filter.h28
-rw-r--r--include/linux/find.h310
-rw-r--r--include/linux/firmware/xlnx-zynqmp.h64
-rw-r--r--include/linux/fixp-arith.h1
-rw-r--r--include/linux/fortify-string.h405
-rw-r--r--include/linux/freezer.h245
-rw-r--r--include/linux/fs.h141
-rw-r--r--include/linux/fs_context.h16
-rw-r--r--include/linux/fs_parser.h1
-rw-r--r--include/linux/fscache.h6
-rw-r--r--include/linux/fscrypt.h39
-rw-r--r--include/linux/fsverity.h3
-rw-r--r--include/linux/ftrace.h88
-rw-r--r--include/linux/gameport.h2
-rw-r--r--include/linux/genl_magic_func.h1
-rw-r--r--include/linux/gfp.h44
-rw-r--r--include/linux/gpio.h2
-rw-r--r--include/linux/gpio/aspeed.h4
-rw-r--r--include/linux/gpio/consumer.h13
-rw-r--r--include/linux/gpio/driver.h4
-rw-r--r--include/linux/gpio/gpio-reg.h4
-rw-r--r--include/linux/gpio/machine.h1
-rw-r--r--include/linux/hdmi.h7
-rw-r--r--include/linux/hid.h33
-rw-r--r--include/linux/highmem.h29
-rw-r--r--include/linux/hisi_acc_qm.h77
-rw-r--r--include/linux/host1x.h2
-rw-r--r--include/linux/hp_sdc.h2
-rw-r--r--include/linux/hpet.h2
-rw-r--r--include/linux/htcpld.h2
-rw-r--r--include/linux/huge_mm.h28
-rw-r--r--include/linux/hugetlb.h190
-rw-r--r--include/linux/hugetlb_cgroup.h91
-rw-r--r--include/linux/hw_breakpoint.h4
-rw-r--r--include/linux/hw_random.h5
-rw-r--r--include/linux/hwmon-sysfs.h1
-rw-r--r--include/linux/hyperv.h4
-rw-r--r--include/linux/i2c.h3
-rw-r--r--include/linux/i3c/device.h5
-rw-r--r--include/linux/ieee80211.h106
-rw-r--r--include/linux/ieee802154.h24
-rw-r--r--include/linux/if_bridge.h1
-rw-r--r--include/linux/if_pppol2tp.h2
-rw-r--r--include/linux/if_pppox.h2
-rw-r--r--include/linux/if_vlan.h9
-rw-r--r--include/linux/igmp.h4
-rw-r--r--include/linux/iio/consumer.h28
-rw-r--r--include/linux/iio/iio-opaque.h2
-rw-r--r--include/linux/iio/iio.h8
-rw-r--r--include/linux/iio/types.h3
-rw-r--r--include/linux/ima.h24
-rw-r--r--include/linux/init.h11
-rw-r--r--include/linux/input/auo-pixcir-ts.h44
-rw-r--r--include/linux/instrumented.h59
-rw-r--r--include/linux/intel-svm.h13
-rw-r--r--include/linux/interconnect-provider.h5
-rw-r--r--include/linux/interval_tree.h58
-rw-r--r--include/linux/io-mapping.h4
-rw-r--r--include/linux/io-pgtable.h3
-rw-r--r--include/linux/io.h2
-rw-r--r--include/linux/io_uring.h28
-rw-r--r--include/linux/io_uring_types.h14
-rw-r--r--include/linux/iomap.h47
-rw-r--r--include/linux/iommu.h219
-rw-r--r--include/linux/iommufd.h98
-rw-r--r--include/linux/ioport.h10
-rw-r--r--include/linux/iosys-map.h15
-rw-r--r--include/linux/iova.h2
-rw-r--r--include/linux/iova_bitmap.h26
-rw-r--r--include/linux/ipc_namespace.h5
-rw-r--r--include/linux/irqchip.h4
-rw-r--r--include/linux/irqdesc.h1
-rw-r--r--include/linux/irqdomain.h143
-rw-r--r--include/linux/irqdomain_defs.h31
-rw-r--r--include/linux/irqreturn.h8
-rw-r--r--include/linux/isa.h52
-rw-r--r--include/linux/iversion.h72
-rw-r--r--include/linux/jbd2.h2
-rw-r--r--include/linux/jump_label.h21
-rw-r--r--include/linux/kallsyms.h11
-rw-r--r--include/linux/kasan.h56
-rw-r--r--include/linux/kcov.h2
-rw-r--r--include/linux/kernfs.h3
-rw-r--r--include/linux/kexec.h9
-rw-r--r--include/linux/key.h6
-rw-r--r--include/linux/khugepaged.h19
-rw-r--r--include/linux/kmsan-checks.h83
-rw-r--r--include/linux/kmsan.h330
-rw-r--r--include/linux/kmsan_string.h21
-rw-r--r--include/linux/kmsan_types.h35
-rw-r--r--include/linux/kprobes.h1
-rw-r--r--include/linux/ksm.h3
-rw-r--r--include/linux/kvm_host.h111
-rw-r--r--include/linux/libata.h16
-rw-r--r--include/linux/libnvdimm.h7
-rw-r--r--include/linux/linear_range.h11
-rw-r--r--include/linux/lru_cache.h3
-rw-r--r--include/linux/lsm_hook_defs.h13
-rw-r--r--include/linux/lsm_hooks.h268
-rw-r--r--include/linux/maple_tree.h692
-rw-r--r--include/linux/math64.h26
-rw-r--r--include/linux/mbcache.h9
-rw-r--r--include/linux/mdev.h77
-rw-r--r--include/linux/mdio.h13
-rw-r--r--include/linux/mdio/mdio-i2c.h10
-rw-r--r--include/linux/mei_aux.h12
-rw-r--r--include/linux/mei_cl_bus.h6
-rw-r--r--include/linux/memcontrol.h178
-rw-r--r--include/linux/memory-tiers.h101
-rw-r--r--include/linux/memory.h18
-rw-r--r--include/linux/memory_hotplug.h30
-rw-r--r--include/linux/mempolicy.h13
-rw-r--r--include/linux/mempool.h5
-rw-r--r--include/linux/memregion.h38
-rw-r--r--include/linux/memremap.h6
-rw-r--r--include/linux/mfd/max8997.h3
-rw-r--r--include/linux/mfd/ocelot.h62
-rw-r--r--include/linux/mfd/rk808.h91
-rw-r--r--include/linux/mfd/tmio.h1
-rw-r--r--include/linux/migrate.h30
-rw-r--r--include/linux/minmax.h26
-rw-r--r--include/linux/mlx5/device.h33
-rw-r--r--include/linux/mlx5/driver.h41
-rw-r--r--include/linux/mlx5/fs.h24
-rw-r--r--include/linux/mlx5/fs_helpers.h48
-rw-r--r--include/linux/mlx5/mlx5_ifc.h418
-rw-r--r--include/linux/mlx5/mlx5_ifc_fpga.h24
-rw-r--r--include/linux/mlx5/qp.h9
-rw-r--r--include/linux/mlx5/vport.h2
-rw-r--r--include/linux/mm.h483
-rw-r--r--include/linux/mm_inline.h242
-rw-r--r--include/linux/mm_types.h318
-rw-r--r--include/linux/mm_types_task.h25
-rw-r--r--include/linux/mmc/card.h1
-rw-r--r--include/linux/mmc/host.h2
-rw-r--r--include/linux/mmc/mmc.h2
-rw-r--r--include/linux/mmc/sdio_ids.h1
-rw-r--r--include/linux/mmdebug.h6
-rw-r--r--include/linux/mmzone.h306
-rw-r--r--include/linux/mnt_idmapping.h108
-rw-r--r--include/linux/module.h20
-rw-r--r--include/linux/mount.h9
-rw-r--r--include/linux/mroute.h6
-rw-r--r--include/linux/mroute6.h4
-rw-r--r--include/linux/msi.h357
-rw-r--r--include/linux/msi_api.h73
-rw-r--r--include/linux/mtd/mtd.h8
-rw-r--r--include/linux/mtd/nand.h1
-rw-r--r--include/linux/mtd/spi-nor.h3
-rw-r--r--include/linux/mv643xx_eth.h2
-rw-r--r--include/linux/namei.h2
-rw-r--r--include/linux/net.h2
-rw-r--r--include/linux/netdevice.h178
-rw-r--r--include/linux/netfilter.h5
-rw-r--r--include/linux/netfilter/ipset/ip_set.h10
-rw-r--r--include/linux/netfilter_bridge/ebtables.h4
-rw-r--r--include/linux/netfilter_defs.h8
-rw-r--r--include/linux/netfs.h8
-rw-r--r--include/linux/netlink.h53
-rw-r--r--include/linux/nfs4.h13
-rw-r--r--include/linux/nfs_fs.h3
-rw-r--r--include/linux/node.h29
-rw-r--r--include/linux/nodemask.h17
-rw-r--r--include/linux/nsproxy.h1
-rw-r--r--include/linux/nvme.h6
-rw-r--r--include/linux/of.h8
-rw-r--r--include/linux/of_address.h11
-rw-r--r--include/linux/of_device.h5
-rw-r--r--include/linux/of_irq.h6
-rw-r--r--include/linux/of_net.h6
-rw-r--r--include/linux/once.h28
-rw-r--r--include/linux/oom.h11
-rw-r--r--include/linux/overflow.h148
-rw-r--r--include/linux/page-flags-layout.h16
-rw-r--r--include/linux/page-flags.h27
-rw-r--r--include/linux/page_counter.h26
-rw-r--r--include/linux/page_ext.h24
-rw-r--r--include/linux/page_idle.h34
-rw-r--r--include/linux/pageblock-flags.h4
-rw-r--r--include/linux/pagemap.h25
-rw-r--r--include/linux/pagewalk.h15
-rw-r--r--include/linux/panic.h1
-rw-r--r--include/linux/pci.h57
-rw-r--r--include/linux/pci_ids.h6
-rw-r--r--include/linux/pcs-altera-tse.h17
-rw-r--r--include/linux/pe.h11
-rw-r--r--include/linux/percpu-rwsem.h6
-rw-r--r--include/linux/percpu.h9
-rw-r--r--include/linux/percpu_counter.h39
-rw-r--r--include/linux/perf/arm_pmu.h12
-rw-r--r--include/linux/perf/riscv_pmu.h2
-rw-r--r--include/linux/perf_event.h221
-rw-r--r--include/linux/pgtable.h72
-rw-r--r--include/linux/phy.h45
-rw-r--r--include/linux/phy/pcie.h12
-rw-r--r--include/linux/phy/phy-mipi-dphy.h3
-rw-r--r--include/linux/phy/tegra/xusb.h4
-rw-r--r--include/linux/phylink.h74
-rw-r--r--include/linux/pinctrl/consumer.h31
-rw-r--r--include/linux/pinctrl/devinfo.h6
-rw-r--r--include/linux/pinctrl/machine.h8
-rw-r--r--include/linux/pinctrl/pinconf-generic.h29
-rw-r--r--include/linux/pinctrl/pinctrl.h20
-rw-r--r--include/linux/pinctrl/pinmux.h5
-rw-r--r--include/linux/pktcdvd.h197
-rw-r--r--include/linux/platform_data/adp5588.h171
-rw-r--r--include/linux/platform_data/cros_ec_commands.h18
-rw-r--r--include/linux/platform_data/cros_ec_proto.h1
-rw-r--r--include/linux/platform_data/dma-hsu.h2
-rw-r--r--include/linux/platform_data/emc2305.h22
-rw-r--r--include/linux/platform_data/gpmc-omap.h8
-rw-r--r--include/linux/platform_data/gsc_hwmon.h5
-rw-r--r--include/linux/platform_data/pca953x.h2
-rw-r--r--include/linux/platform_data/ssm2518.h21
-rw-r--r--include/linux/platform_data/st33zp24.h16
-rw-r--r--include/linux/platform_data/tps68470.h7
-rw-r--r--include/linux/platform_data/usb-s3c2410_udc.h6
-rw-r--r--include/linux/platform_data/x86/asus-wmi.h11
-rw-r--r--include/linux/platform_data/x86/nvidia-wmi-ec-backlight.h76
-rw-r--r--include/linux/platform_data/x86/pmc_atom.h10
-rw-r--r--include/linux/platform_data/x86/pwm-lpss.h33
-rw-r--r--include/linux/platform_data/x86/simatic-ipc-base.h1
-rw-r--r--include/linux/platform_data/x86/simatic-ipc.h2
-rw-r--r--include/linux/pm.h37
-rw-r--r--include/linux/pm_domain.h7
-rw-r--r--include/linux/pm_runtime.h20
-rw-r--r--include/linux/poison.h3
-rw-r--r--include/linux/posix_acl.h41
-rw-r--r--include/linux/posix_acl_xattr.h53
-rw-r--r--include/linux/power_supply.h48
-rw-r--r--include/linux/prandom.h26
-rw-r--r--include/linux/preempt.h42
-rw-r--r--include/linux/printk.h7
-rw-r--r--include/linux/proc_fs.h2
-rw-r--r--include/linux/property.h14
-rw-r--r--include/linux/pse-pd/pse.h129
-rw-r--r--include/linux/psi.h14
-rw-r--r--include/linux/psi_types.h35
-rw-r--r--include/linux/pstore_ram.h99
-rw-r--r--include/linux/ptp_clock_kernel.h60
-rw-r--r--include/linux/ptrace.h9
-rw-r--r--include/linux/pwm.h25
-rw-r--r--include/linux/pxa2xx_ssp.h1
-rw-r--r--include/linux/raid/pq.h8
-rw-r--r--include/linux/random.h114
-rw-r--r--include/linux/rcupdate.h68
-rw-r--r--include/linux/rcutiny.h58
-rw-r--r--include/linux/rcutree.h44
-rw-r--r--include/linux/reboot.h8
-rw-r--r--include/linux/regmap.h60
-rw-r--r--include/linux/regset.h15
-rw-r--r--include/linux/regulator/consumer.h37
-rw-r--r--include/linux/regulator/driver.h3
-rw-r--r--include/linux/regulator/gpio-regulator.h2
-rw-r--r--include/linux/regulator/mt6331-regulator.h46
-rw-r--r--include/linux/regulator/mt6332-regulator.h27
-rw-r--r--include/linux/regulator/mt6357-regulator.h51
-rw-r--r--include/linux/regulator/userspace-consumer.h1
-rw-r--r--include/linux/remoteproc.h22
-rw-r--r--include/linux/resctrl.h70
-rw-r--r--include/linux/rhashtable.h61
-rw-r--r--include/linux/ring_buffer.h4
-rw-r--r--include/linux/rmap.h80
-rw-r--r--include/linux/rtnetlink.h9
-rw-r--r--include/linux/rwlock.h2
-rw-r--r--include/linux/sbitmap.h19
-rw-r--r--include/linux/sched.h79
-rw-r--r--include/linux/sched/coredump.h7
-rw-r--r--include/linux/sched/signal.h1
-rw-r--r--include/linux/sched/sysctl.h5
-rw-r--r--include/linux/sched/task.h3
-rw-r--r--include/linux/sched/user.h2
-rw-r--r--include/linux/scmi_protocol.h4
-rw-r--r--include/linux/scs.h18
-rw-r--r--include/linux/sctp.h5
-rw-r--r--include/linux/seccomp.h1
-rw-r--r--include/linux/security.h63
-rw-r--r--include/linux/sed-opal.h4
-rw-r--r--include/linux/serdev.h1
-rw-r--r--include/linux/serial_8250.h5
-rw-r--r--include/linux/serial_core.h71
-rw-r--r--include/linux/sfp.h194
-rw-r--r--include/linux/shmem_fs.h29
-rw-r--r--include/linux/shrinker.h3
-rw-r--r--include/linux/skbuff.h90
-rw-r--r--include/linux/skmsg.h3
-rw-r--r--include/linux/slab.h274
-rw-r--r--include/linux/slab_def.h3
-rw-r--r--include/linux/slub_def.h8
-rw-r--r--include/linux/smc911x.h14
-rw-r--r--include/linux/soc/apple/rtkit.h12
-rw-r--r--include/linux/soc/mediatek/mtk-mmsys.h9
-rw-r--r--include/linux/soc/mediatek/mtk-mutex.h2
-rw-r--r--include/linux/soc/mediatek/mtk_sip_svc.h3
-rw-r--r--include/linux/soc/mediatek/mtk_wed.h140
-rw-r--r--include/linux/soc/qcom/llcc-qcom.h42
-rw-r--r--include/linux/soc/qcom/qmi.h20
-rw-r--r--include/linux/soc/qcom/smd-rpm.h3
-rw-r--r--include/linux/soc/sunxi/sunxi_sram.h2
-rw-r--r--include/linux/socket.h5
-rw-r--r--include/linux/sockptr.h5
-rw-r--r--include/linux/soundwire/sdw.h9
-rw-r--r--include/linux/soundwire/sdw_intel.h65
-rw-r--r--include/linux/spi/spi-mem.h2
-rw-r--r--include/linux/spi/spi.h62
-rw-r--r--include/linux/spinlock.h2
-rw-r--r--include/linux/spinlock_api_smp.h2
-rw-r--r--include/linux/spinlock_api_up.h2
-rw-r--r--include/linux/spinlock_rt.h2
-rw-r--r--include/linux/spinlock_up.h2
-rw-r--r--include/linux/srcu.h72
-rw-r--r--include/linux/srcutiny.h10
-rw-r--r--include/linux/srcutree.h5
-rw-r--r--include/linux/stackdepot.h8
-rw-r--r--include/linux/stackprotector.h19
-rw-r--r--include/linux/stat.h2
-rw-r--r--include/linux/stmmac.h2
-rw-r--r--include/linux/string.h45
-rw-r--r--include/linux/string_helpers.h7
-rw-r--r--include/linux/sunrpc/clnt.h1
-rw-r--r--include/linux/sunrpc/sched.h13
-rw-r--r--include/linux/sunrpc/svc.h28
-rw-r--r--include/linux/sunrpc/xdr.h2
-rw-r--r--include/linux/suspend.h11
-rw-r--r--include/linux/swab.h25
-rw-r--r--include/linux/swap.h62
-rw-r--r--include/linux/swap_cgroup.h4
-rw-r--r--include/linux/swapfile.h7
-rw-r--r--include/linux/swapops.h215
-rw-r--r--include/linux/syscalls.h2
-rw-r--r--include/linux/syslog.h3
-rw-r--r--include/linux/tcp.h9
-rw-r--r--include/linux/termios_internal.h49
-rw-r--r--include/linux/thermal.h86
-rw-r--r--include/linux/thunderbolt.h2
-rw-r--r--include/linux/time_namespace.h6
-rw-r--r--include/linux/timer.h35
-rw-r--r--include/linux/timerqueue.h2
-rw-r--r--include/linux/tnum.h20
-rw-r--r--include/linux/trace.h40
-rw-r--r--include/linux/trace_events.h3
-rw-r--r--include/linux/tty.h10
-rw-r--r--include/linux/tty_driver.h10
-rw-r--r--include/linux/tty_ldisc.h4
-rw-r--r--include/linux/u64_stats_sync.h145
-rw-r--r--include/linux/uaccess.h19
-rw-r--r--include/linux/ucb1400.h2
-rw-r--r--include/linux/udp.h7
-rw-r--r--include/linux/uio.h11
-rw-r--r--include/linux/umh.h9
-rw-r--r--include/linux/units.h3
-rw-r--r--include/linux/usb.h3
-rw-r--r--include/linux/usb/chipidea.h1
-rw-r--r--include/linux/usb/hcd.h1
-rw-r--r--include/linux/usb/serial.h4
-rw-r--r--include/linux/usb/tcpci.h22
-rw-r--r--include/linux/usb/typec_dp.h5
-rw-r--r--include/linux/user_events.h15
-rw-r--r--include/linux/user_namespace.h35
-rw-r--r--include/linux/userfaultfd_k.h15
-rw-r--r--include/linux/utsname.h1
-rw-r--r--include/linux/vdpa.h1
-rw-r--r--include/linux/verification.h8
-rw-r--r--include/linux/vfio.h97
-rw-r--r--include/linux/vfio_pci_core.h149
-rw-r--r--include/linux/virtio.h6
-rw-r--r--include/linux/virtio_config.h32
-rw-r--r--include/linux/virtio_net.h9
-rw-r--r--include/linux/virtio_pci_legacy.h2
-rw-r--r--include/linux/vm_event_item.h22
-rw-r--r--include/linux/vmacache.h28
-rw-r--r--include/linux/vmstat.h6
-rw-r--r--include/linux/vt_kern.h1
-rw-r--r--include/linux/wait.h44
-rw-r--r--include/linux/wait_bit.h8
-rw-r--r--include/linux/wireless.h10
-rw-r--r--include/linux/wl12xx.h44
-rw-r--r--include/linux/writeback.h8
-rw-r--r--include/linux/wwan.h2
-rw-r--r--include/linux/xattr.h52
-rw-r--r--include/linux/zsmalloc.h2
480 files changed, 12873 insertions, 5347 deletions
diff --git a/include/linux/a.out.h b/include/linux/a.out.h
deleted file mode 100644
index 600cf45645c6..000000000000
--- a/include/linux/a.out.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __A_OUT_GNU_H__
-#define __A_OUT_GNU_H__
-
-#include <uapi/linux/a.out.h>
-
-#ifndef __ASSEMBLY__
-#ifdef linux
-#include <asm/page.h>
-#if defined(__i386__) || defined(__mc68000__)
-#else
-#ifndef SEGMENT_SIZE
-#define SEGMENT_SIZE PAGE_SIZE
-#endif
-#endif
-#endif
-#endif /*__ASSEMBLY__ */
-#endif /* __A_OUT_GNU_H__ */
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 6f64b2f3dc54..5e6a876e17ba 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -279,14 +279,17 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) { }
void acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa);
+#if defined(CONFIG_ARM64) || defined(CONFIG_LOONGARCH)
+void acpi_arch_dma_setup(struct device *dev);
+#else
+static inline void acpi_arch_dma_setup(struct device *dev) { }
+#endif
+
#ifdef CONFIG_ARM64
void acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa);
-void acpi_arch_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size);
#else
static inline void
acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) { }
-static inline void
-acpi_arch_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size) { }
#endif
int acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma);
@@ -495,7 +498,7 @@ bool acpi_dev_resource_address_space(struct acpi_resource *ares,
struct resource_win *win);
bool acpi_dev_resource_ext_address_space(struct acpi_resource *ares,
struct resource_win *win);
-unsigned long acpi_dev_irq_flags(u8 triggering, u8 polarity, u8 shareable);
+unsigned long acpi_dev_irq_flags(u8 triggering, u8 polarity, u8 shareable, u8 wake_capable);
unsigned int acpi_dev_get_irq_type(int triggering, int polarity);
bool acpi_dev_resource_interrupt(struct acpi_resource *ares, int index,
struct resource *res);
@@ -506,6 +509,7 @@ int acpi_dev_get_resources(struct acpi_device *adev, struct list_head *list,
void *preproc_data);
int acpi_dev_get_dma_resources(struct acpi_device *adev,
struct list_head *list);
+int acpi_dev_get_memory_resources(struct acpi_device *adev, struct list_head *list);
int acpi_dev_filter_resource_type(struct acpi_resource *ares,
unsigned long types);
@@ -582,6 +586,7 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
#define OSC_SB_CPC_FLEXIBLE_ADR_SPACE 0x00004000
#define OSC_SB_NATIVE_USB4_SUPPORT 0x00040000
#define OSC_SB_PRM_SUPPORT 0x00200000
+#define OSC_SB_FFH_OPR_SUPPORT 0x00400000
extern bool osc_sb_apei_support_acked;
extern bool osc_pc_lpi_support_confirmed;
@@ -798,6 +803,11 @@ acpi_dev_hid_uid_match(struct acpi_device *adev, const char *hid2, const char *u
return false;
}
+static inline int acpi_dev_uid_to_integer(struct acpi_device *adev, u64 *integer)
+{
+ return -ENODEV;
+}
+
static inline struct acpi_device *
acpi_dev_get_first_match_dev(const char *hid, const char *uid, s64 hrv)
{
@@ -977,8 +987,7 @@ static inline enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev)
return DEV_DMA_NOT_SUPPORTED;
}
-static inline int acpi_dma_get_range(struct device *dev, u64 *dma_addr,
- u64 *offset, u64 *size)
+static inline int acpi_dma_get_range(struct device *dev, const struct bus_dma_region **map)
{
return -ENODEV;
}
@@ -1075,6 +1084,7 @@ acpi_status acpi_os_prepare_extended_sleep(u8 sleep_state,
struct acpi_s2idle_dev_ops {
struct list_head list_node;
void (*prepare)(void);
+ void (*check)(void);
void (*restore)(void);
};
int acpi_register_lps0_dev(struct acpi_s2idle_dev_ops *arg);
@@ -1127,6 +1137,7 @@ int acpi_subsys_freeze(struct device *dev);
int acpi_subsys_poweroff(struct device *dev);
void acpi_ec_mark_gpe_for_wake(void);
void acpi_ec_set_gpe_wake_mask(u8 action);
+int acpi_subsys_restore_early(struct device *dev);
#else
static inline int acpi_subsys_prepare(struct device *dev) { return 0; }
static inline void acpi_subsys_complete(struct device *dev) {}
@@ -1135,6 +1146,7 @@ static inline int acpi_subsys_suspend_noirq(struct device *dev) { return 0; }
static inline int acpi_subsys_suspend(struct device *dev) { return 0; }
static inline int acpi_subsys_freeze(struct device *dev) { return 0; }
static inline int acpi_subsys_poweroff(struct device *dev) { return 0; }
+static inline int acpi_subsys_restore_early(struct device *dev) { return 0; }
static inline void acpi_ec_mark_gpe_for_wake(void) {}
static inline void acpi_ec_set_gpe_wake_mask(u8 action) {}
#endif
@@ -1202,7 +1214,8 @@ bool acpi_gpio_get_irq_resource(struct acpi_resource *ares,
struct acpi_resource_gpio **agpio);
bool acpi_gpio_get_io_resource(struct acpi_resource *ares,
struct acpi_resource_gpio **agpio);
-int acpi_dev_gpio_irq_get_by(struct acpi_device *adev, const char *name, int index);
+int acpi_dev_gpio_irq_wake_get_by(struct acpi_device *adev, const char *name, int index,
+ bool *wake_capable);
#else
static inline bool acpi_gpio_get_irq_resource(struct acpi_resource *ares,
struct acpi_resource_gpio **agpio)
@@ -1214,16 +1227,28 @@ static inline bool acpi_gpio_get_io_resource(struct acpi_resource *ares,
{
return false;
}
-static inline int acpi_dev_gpio_irq_get_by(struct acpi_device *adev,
- const char *name, int index)
+static inline int acpi_dev_gpio_irq_wake_get_by(struct acpi_device *adev, const char *name,
+ int index, bool *wake_capable)
{
return -ENXIO;
}
#endif
+static inline int acpi_dev_gpio_irq_wake_get(struct acpi_device *adev, int index,
+ bool *wake_capable)
+{
+ return acpi_dev_gpio_irq_wake_get_by(adev, NULL, index, wake_capable);
+}
+
+static inline int acpi_dev_gpio_irq_get_by(struct acpi_device *adev, const char *name,
+ int index)
+{
+ return acpi_dev_gpio_irq_wake_get_by(adev, name, index, NULL);
+}
+
static inline int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index)
{
- return acpi_dev_gpio_irq_get_by(adev, NULL, index);
+ return acpi_dev_gpio_irq_wake_get_by(adev, NULL, index, NULL);
}
/* Device properties */
@@ -1466,6 +1491,16 @@ void acpi_init_pcc(void);
static inline void acpi_init_pcc(void) { }
#endif
+#ifdef CONFIG_ACPI_FFH
+void acpi_init_ffh(void);
+extern int acpi_ffh_address_space_arch_setup(void *handler_ctxt,
+ void **region_ctxt);
+extern int acpi_ffh_address_space_arch_handler(acpi_integer *value,
+ void *region_context);
+#else
+static inline void acpi_init_ffh(void) { }
+#endif
+
#ifdef CONFIG_ACPI
extern void acpi_device_notify(struct device *dev);
extern void acpi_device_notify_remove(struct device *dev);
diff --git a/include/linux/acpi_apmt.h b/include/linux/acpi_apmt.h
new file mode 100644
index 000000000000..40bd634d082f
--- /dev/null
+++ b/include/linux/acpi_apmt.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * ARM CoreSight PMU driver.
+ * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES.
+ *
+ */
+
+#ifndef __ACPI_APMT_H__
+#define __ACPI_APMT_H__
+
+#include <linux/acpi.h>
+
+#ifdef CONFIG_ACPI_APMT
+void acpi_apmt_init(void);
+#else
+static inline void acpi_apmt_init(void) { }
+#endif /* CONFIG_ACPI_APMT */
+
+#endif /* __ACPI_APMT_H__ */
diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h
index 49e5383d4222..17fa26215292 100644
--- a/include/linux/ahci_platform.h
+++ b/include/linux/ahci_platform.h
@@ -13,6 +13,7 @@
#include <linux/compiler.h>
+struct clk;
struct device;
struct ata_port_info;
struct ahci_host_priv;
@@ -21,8 +22,12 @@ struct scsi_host_template;
int ahci_platform_enable_phys(struct ahci_host_priv *hpriv);
void ahci_platform_disable_phys(struct ahci_host_priv *hpriv);
+struct clk *ahci_platform_find_clk(struct ahci_host_priv *hpriv,
+ const char *con_id);
int ahci_platform_enable_clks(struct ahci_host_priv *hpriv);
void ahci_platform_disable_clks(struct ahci_host_priv *hpriv);
+int ahci_platform_deassert_rsts(struct ahci_host_priv *hpriv);
+int ahci_platform_assert_rsts(struct ahci_host_priv *hpriv);
int ahci_platform_enable_regulators(struct ahci_host_priv *hpriv);
void ahci_platform_disable_regulators(struct ahci_host_priv *hpriv);
int ahci_platform_enable_resources(struct ahci_host_priv *hpriv);
@@ -41,6 +46,7 @@ int ahci_platform_resume_host(struct device *dev);
int ahci_platform_suspend(struct device *dev);
int ahci_platform_resume(struct device *dev);
-#define AHCI_PLATFORM_GET_RESETS 0x01
+#define AHCI_PLATFORM_GET_RESETS BIT(0)
+#define AHCI_PLATFORM_RST_TRIGGER BIT(1)
#endif /* _AHCI_PLATFORM_H */
diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index e94cdf235f1d..5001e14c5c06 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -67,6 +67,7 @@ struct amba_device {
struct clk *pclk;
struct device_dma_parameters dma_parms;
unsigned int periphid;
+ struct mutex periphid_lock;
unsigned int cid;
struct amba_cs_uci_id uci;
unsigned int irq[AMBA_NR_IRQS];
diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h
new file mode 100644
index 000000000000..1c4b8659f171
--- /dev/null
+++ b/include/linux/amd-pstate.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/include/linux/amd-pstate.h
+ *
+ * Copyright (C) 2022 Advanced Micro Devices, Inc.
+ *
+ * Author: Meng Li <li.meng@amd.com>
+ */
+
+#ifndef _LINUX_AMD_PSTATE_H
+#define _LINUX_AMD_PSTATE_H
+
+#include <linux/pm_qos.h>
+
+/*********************************************************************
+ * AMD P-state INTERFACE *
+ *********************************************************************/
+/**
+ * struct amd_aperf_mperf
+ * @aperf: actual performance frequency clock count
+ * @mperf: maximum performance frequency clock count
+ * @tsc: time stamp counter
+ */
+struct amd_aperf_mperf {
+ u64 aperf;
+ u64 mperf;
+ u64 tsc;
+};
+
+/**
+ * struct amd_cpudata - private CPU data for AMD P-State
+ * @cpu: CPU number
+ * @req: constraint request to apply
+ * @cppc_req_cached: cached performance request hints
+ * @highest_perf: the maximum performance an individual processor may reach,
+ * assuming ideal conditions
+ * @nominal_perf: the maximum sustained performance level of the processor,
+ * assuming ideal operating conditions
+ * @lowest_nonlinear_perf: the lowest performance level at which nonlinear power
+ * savings are achieved
+ * @lowest_perf: the absolute lowest performance level of the processor
+ * @max_freq: the frequency that mapped to highest_perf
+ * @min_freq: the frequency that mapped to lowest_perf
+ * @nominal_freq: the frequency that mapped to nominal_perf
+ * @lowest_nonlinear_freq: the frequency that mapped to lowest_nonlinear_perf
+ * @cur: Difference of Aperf/Mperf/tsc count between last and current sample
+ * @prev: Last Aperf/Mperf/tsc count value read from register
+ * @freq: current cpu frequency value
+ * @boost_supported: check whether the Processor or SBIOS supports boost mode
+ *
+ * The amd_cpudata is key private data for each CPU thread in AMD P-State, and
+ * represents all the attributes and goals that AMD P-State requests at runtime.
+ */
+struct amd_cpudata {
+ int cpu;
+
+ struct freq_qos_request req[2];
+ u64 cppc_req_cached;
+
+ u32 highest_perf;
+ u32 nominal_perf;
+ u32 lowest_nonlinear_perf;
+ u32 lowest_perf;
+
+ u32 max_freq;
+ u32 min_freq;
+ u32 nominal_freq;
+ u32 lowest_nonlinear_freq;
+
+ struct amd_aperf_mperf cur;
+ struct amd_aperf_mperf prev;
+
+ u64 freq;
+ bool boost_supported;
+};
+
+#endif /* _LINUX_AMD_PSTATE_H */
diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h
index e5c76c1ef9ed..c87aeecaa9b2 100644
--- a/include/linux/arm_ffa.h
+++ b/include/linux/arm_ffa.h
@@ -11,12 +11,96 @@
#include <linux/types.h>
#include <linux/uuid.h>
+#define FFA_SMC(calling_convention, func_num) \
+ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, (calling_convention), \
+ ARM_SMCCC_OWNER_STANDARD, (func_num))
+
+#define FFA_SMC_32(func_num) FFA_SMC(ARM_SMCCC_SMC_32, (func_num))
+#define FFA_SMC_64(func_num) FFA_SMC(ARM_SMCCC_SMC_64, (func_num))
+
+#define FFA_ERROR FFA_SMC_32(0x60)
+#define FFA_SUCCESS FFA_SMC_32(0x61)
+#define FFA_INTERRUPT FFA_SMC_32(0x62)
+#define FFA_VERSION FFA_SMC_32(0x63)
+#define FFA_FEATURES FFA_SMC_32(0x64)
+#define FFA_RX_RELEASE FFA_SMC_32(0x65)
+#define FFA_RXTX_MAP FFA_SMC_32(0x66)
+#define FFA_FN64_RXTX_MAP FFA_SMC_64(0x66)
+#define FFA_RXTX_UNMAP FFA_SMC_32(0x67)
+#define FFA_PARTITION_INFO_GET FFA_SMC_32(0x68)
+#define FFA_ID_GET FFA_SMC_32(0x69)
+#define FFA_MSG_POLL FFA_SMC_32(0x6A)
+#define FFA_MSG_WAIT FFA_SMC_32(0x6B)
+#define FFA_YIELD FFA_SMC_32(0x6C)
+#define FFA_RUN FFA_SMC_32(0x6D)
+#define FFA_MSG_SEND FFA_SMC_32(0x6E)
+#define FFA_MSG_SEND_DIRECT_REQ FFA_SMC_32(0x6F)
+#define FFA_FN64_MSG_SEND_DIRECT_REQ FFA_SMC_64(0x6F)
+#define FFA_MSG_SEND_DIRECT_RESP FFA_SMC_32(0x70)
+#define FFA_FN64_MSG_SEND_DIRECT_RESP FFA_SMC_64(0x70)
+#define FFA_MEM_DONATE FFA_SMC_32(0x71)
+#define FFA_FN64_MEM_DONATE FFA_SMC_64(0x71)
+#define FFA_MEM_LEND FFA_SMC_32(0x72)
+#define FFA_FN64_MEM_LEND FFA_SMC_64(0x72)
+#define FFA_MEM_SHARE FFA_SMC_32(0x73)
+#define FFA_FN64_MEM_SHARE FFA_SMC_64(0x73)
+#define FFA_MEM_RETRIEVE_REQ FFA_SMC_32(0x74)
+#define FFA_FN64_MEM_RETRIEVE_REQ FFA_SMC_64(0x74)
+#define FFA_MEM_RETRIEVE_RESP FFA_SMC_32(0x75)
+#define FFA_MEM_RELINQUISH FFA_SMC_32(0x76)
+#define FFA_MEM_RECLAIM FFA_SMC_32(0x77)
+#define FFA_MEM_OP_PAUSE FFA_SMC_32(0x78)
+#define FFA_MEM_OP_RESUME FFA_SMC_32(0x79)
+#define FFA_MEM_FRAG_RX FFA_SMC_32(0x7A)
+#define FFA_MEM_FRAG_TX FFA_SMC_32(0x7B)
+#define FFA_NORMAL_WORLD_RESUME FFA_SMC_32(0x7C)
+
+/*
+ * For some calls it is necessary to use SMC64 to pass or return 64-bit values.
+ * For such calls FFA_FN_NATIVE(name) will choose the appropriate
+ * (native-width) function ID.
+ */
+#ifdef CONFIG_64BIT
+#define FFA_FN_NATIVE(name) FFA_FN64_##name
+#else
+#define FFA_FN_NATIVE(name) FFA_##name
+#endif
+
+/* FFA error codes. */
+#define FFA_RET_SUCCESS (0)
+#define FFA_RET_NOT_SUPPORTED (-1)
+#define FFA_RET_INVALID_PARAMETERS (-2)
+#define FFA_RET_NO_MEMORY (-3)
+#define FFA_RET_BUSY (-4)
+#define FFA_RET_INTERRUPTED (-5)
+#define FFA_RET_DENIED (-6)
+#define FFA_RET_RETRY (-7)
+#define FFA_RET_ABORTED (-8)
+
+/* FFA version encoding */
+#define FFA_MAJOR_VERSION_MASK GENMASK(30, 16)
+#define FFA_MINOR_VERSION_MASK GENMASK(15, 0)
+#define FFA_MAJOR_VERSION(x) ((u16)(FIELD_GET(FFA_MAJOR_VERSION_MASK, (x))))
+#define FFA_MINOR_VERSION(x) ((u16)(FIELD_GET(FFA_MINOR_VERSION_MASK, (x))))
+#define FFA_PACK_VERSION_INFO(major, minor) \
+ (FIELD_PREP(FFA_MAJOR_VERSION_MASK, (major)) | \
+ FIELD_PREP(FFA_MINOR_VERSION_MASK, (minor)))
+#define FFA_VERSION_1_0 FFA_PACK_VERSION_INFO(1, 0)
+
+/**
+ * FF-A specification mentions explicitly about '4K pages'. This should
+ * not be confused with the kernel PAGE_SIZE, which is the translation
+ * granule kernel is configured and may be one among 4K, 16K and 64K.
+ */
+#define FFA_PAGE_SIZE SZ_4K
+
/* FFA Bus/Device/Driver related */
struct ffa_device {
int vm_id;
bool mode_32bit;
uuid_t uuid;
struct device dev;
+ const struct ffa_ops *ops;
};
#define to_ffa_dev(d) container_of(d, struct ffa_device, dev)
@@ -47,17 +131,18 @@ static inline void *ffa_dev_get_drvdata(struct ffa_device *fdev)
}
#if IS_REACHABLE(CONFIG_ARM_FFA_TRANSPORT)
-struct ffa_device *ffa_device_register(const uuid_t *uuid, int vm_id);
+struct ffa_device *ffa_device_register(const uuid_t *uuid, int vm_id,
+ const struct ffa_ops *ops);
void ffa_device_unregister(struct ffa_device *ffa_dev);
int ffa_driver_register(struct ffa_driver *driver, struct module *owner,
const char *mod_name);
void ffa_driver_unregister(struct ffa_driver *driver);
bool ffa_device_is_valid(struct ffa_device *ffa_dev);
-const struct ffa_dev_ops *ffa_dev_ops_get(struct ffa_device *dev);
#else
static inline
-struct ffa_device *ffa_device_register(const uuid_t *uuid, int vm_id)
+struct ffa_device *ffa_device_register(const uuid_t *uuid, int vm_id,
+ const struct ffa_ops *ops)
{
return NULL;
}
@@ -76,11 +161,6 @@ static inline void ffa_driver_unregister(struct ffa_driver *driver) {}
static inline
bool ffa_device_is_valid(struct ffa_device *ffa_dev) { return false; }
-static inline
-const struct ffa_dev_ops *ffa_dev_ops_get(struct ffa_device *dev)
-{
- return NULL;
-}
#endif /* CONFIG_ARM_FFA_TRANSPORT */
#define ffa_register(driver) \
@@ -109,7 +189,10 @@ struct ffa_partition_info {
#define FFA_PARTITION_DIRECT_SEND BIT(1)
/* partition can send and receive indirect messages. */
#define FFA_PARTITION_INDIRECT_MSG BIT(2)
+/* partition runs in the AArch64 execution state. */
+#define FFA_PARTITION_AARCH64_EXEC BIT(8)
u32 properties;
+ u32 uuid[4];
};
/* For use with FFA_MSG_SEND_DIRECT_{REQ,RESP} which pass data via registers */
@@ -161,11 +244,11 @@ struct ffa_mem_region_attributes {
*/
#define FFA_MEM_RETRIEVE_SELF_BORROWER BIT(0)
u8 flag;
- u32 composite_off;
/*
* Offset in bytes from the start of the outer `ffa_memory_region` to
* an `struct ffa_mem_region_addr_range`.
*/
+ u32 composite_off;
u64 reserved;
};
@@ -257,18 +340,28 @@ struct ffa_mem_ops_args {
struct ffa_mem_region_attributes *attrs;
};
-struct ffa_dev_ops {
+struct ffa_info_ops {
u32 (*api_version_get)(void);
int (*partition_info_get)(const char *uuid_str,
struct ffa_partition_info *buffer);
+};
+
+struct ffa_msg_ops {
void (*mode_32bit_set)(struct ffa_device *dev);
int (*sync_send_receive)(struct ffa_device *dev,
struct ffa_send_direct_data *data);
+};
+
+struct ffa_mem_ops {
int (*memory_reclaim)(u64 g_handle, u32 flags);
- int (*memory_share)(struct ffa_device *dev,
- struct ffa_mem_ops_args *args);
- int (*memory_lend)(struct ffa_device *dev,
- struct ffa_mem_ops_args *args);
+ int (*memory_share)(struct ffa_mem_ops_args *args);
+ int (*memory_lend)(struct ffa_mem_ops_args *args);
+};
+
+struct ffa_ops {
+ const struct ffa_info_ops *info_ops;
+ const struct ffa_msg_ops *msg_ops;
+ const struct ffa_mem_ops *mem_ops;
};
#endif /* _LINUX_ARM_FFA_H */
diff --git a/include/linux/ata.h b/include/linux/ata.h
index 21292b5bbb55..0c18499f60b6 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -566,6 +566,18 @@ struct ata_bmdma_prd {
((((id)[ATA_ID_SATA_CAPABILITY] != 0x0000) && \
((id)[ATA_ID_SATA_CAPABILITY] != 0xffff)) && \
((id)[ATA_ID_FEATURE_SUPP] & (1 << 2)))
+#define ata_id_has_devslp(id) \
+ ((((id)[ATA_ID_SATA_CAPABILITY] != 0x0000) && \
+ ((id)[ATA_ID_SATA_CAPABILITY] != 0xffff)) && \
+ ((id)[ATA_ID_FEATURE_SUPP] & (1 << 8)))
+#define ata_id_has_ncq_autosense(id) \
+ ((((id)[ATA_ID_SATA_CAPABILITY] != 0x0000) && \
+ ((id)[ATA_ID_SATA_CAPABILITY] != 0xffff)) && \
+ ((id)[ATA_ID_FEATURE_SUPP] & (1 << 7)))
+#define ata_id_has_dipm(id) \
+ ((((id)[ATA_ID_SATA_CAPABILITY] != 0x0000) && \
+ ((id)[ATA_ID_SATA_CAPABILITY] != 0xffff)) && \
+ ((id)[ATA_ID_FEATURE_SUPP] & (1 << 3)))
#define ata_id_iordy_disable(id) ((id)[ATA_ID_CAPABILITY] & (1 << 10))
#define ata_id_has_iordy(id) ((id)[ATA_ID_CAPABILITY] & (1 << 11))
#define ata_id_u32(id,n) \
@@ -578,9 +590,6 @@ struct ata_bmdma_prd {
#define ata_id_cdb_intr(id) (((id)[ATA_ID_CONFIG] & 0x60) == 0x20)
#define ata_id_has_da(id) ((id)[ATA_ID_SATA_CAPABILITY_2] & (1 << 4))
-#define ata_id_has_devslp(id) ((id)[ATA_ID_FEATURE_SUPP] & (1 << 8))
-#define ata_id_has_ncq_autosense(id) \
- ((id)[ATA_ID_FEATURE_SUPP] & (1 << 7))
static inline bool ata_id_has_hipm(const u16 *id)
{
@@ -592,17 +601,6 @@ static inline bool ata_id_has_hipm(const u16 *id)
return val & (1 << 9);
}
-static inline bool ata_id_has_dipm(const u16 *id)
-{
- u16 val = id[ATA_ID_FEATURE_SUPP];
-
- if (val == 0 || val == 0xffff)
- return false;
-
- return val & (1 << 3);
-}
-
-
static inline bool ata_id_has_fua(const u16 *id)
{
if ((id[ATA_ID_CFSSE] & 0xC000) != 0x4000)
@@ -617,15 +615,6 @@ static inline bool ata_id_has_flush(const u16 *id)
return id[ATA_ID_COMMAND_SET_2] & (1 << 12);
}
-static inline bool ata_id_flush_enabled(const u16 *id)
-{
- if (ata_id_has_flush(id) == 0)
- return false;
- if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000)
- return false;
- return id[ATA_ID_CFS_ENABLE_2] & (1 << 12);
-}
-
static inline bool ata_id_has_flush_ext(const u16 *id)
{
if ((id[ATA_ID_COMMAND_SET_2] & 0xC000) != 0x4000)
@@ -633,19 +622,6 @@ static inline bool ata_id_has_flush_ext(const u16 *id)
return id[ATA_ID_COMMAND_SET_2] & (1 << 13);
}
-static inline bool ata_id_flush_ext_enabled(const u16 *id)
-{
- if (ata_id_has_flush_ext(id) == 0)
- return false;
- if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000)
- return false;
- /*
- * some Maxtor disks have bit 13 defined incorrectly
- * so check bit 10 too
- */
- return (id[ATA_ID_CFS_ENABLE_2] & 0x2400) == 0x2400;
-}
-
static inline u32 ata_id_logical_sector_size(const u16 *id)
{
/* T13/1699-D Revision 6a, Sep 6, 2008. Page 128.
@@ -700,15 +676,6 @@ static inline bool ata_id_has_lba48(const u16 *id)
return id[ATA_ID_COMMAND_SET_2] & (1 << 10);
}
-static inline bool ata_id_lba48_enabled(const u16 *id)
-{
- if (ata_id_has_lba48(id) == 0)
- return false;
- if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000)
- return false;
- return id[ATA_ID_CFS_ENABLE_2] & (1 << 10);
-}
-
static inline bool ata_id_hpa_enabled(const u16 *id)
{
/* Yes children, word 83 valid bits cover word 82 data */
@@ -771,16 +738,21 @@ static inline bool ata_id_has_read_log_dma_ext(const u16 *id)
static inline bool ata_id_has_sense_reporting(const u16 *id)
{
- if (!(id[ATA_ID_CFS_ENABLE_2] & (1 << 15)))
+ if (!(id[ATA_ID_CFS_ENABLE_2] & BIT(15)))
+ return false;
+ if ((id[ATA_ID_COMMAND_SET_3] & (BIT(15) | BIT(14))) != BIT(14))
return false;
- return id[ATA_ID_COMMAND_SET_3] & (1 << 6);
+ return id[ATA_ID_COMMAND_SET_3] & BIT(6);
}
static inline bool ata_id_sense_reporting_enabled(const u16 *id)
{
- if (!(id[ATA_ID_CFS_ENABLE_2] & (1 << 15)))
+ if (!ata_id_has_sense_reporting(id))
+ return false;
+ /* ata_id_has_sense_reporting() == true, word 86 must have bit 15 set */
+ if ((id[ATA_ID_COMMAND_SET_4] & (BIT(15) | BIT(14))) != BIT(14))
return false;
- return id[ATA_ID_COMMAND_SET_4] & (1 << 6);
+ return id[ATA_ID_COMMAND_SET_4] & BIT(6);
}
/**
diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index 2ce27e8e4f19..d91af50ac58d 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -136,7 +136,8 @@ enum virtchnl_ops {
VIRTCHNL_OP_DISABLE_CHANNELS = 31,
VIRTCHNL_OP_ADD_CLOUD_FILTER = 32,
VIRTCHNL_OP_DEL_CLOUD_FILTER = 33,
- /* opcode 34 - 44 are reserved */
+ /* opcode 34 - 43 are reserved */
+ VIRTCHNL_OP_GET_SUPPORTED_RXDIDS = 44,
VIRTCHNL_OP_ADD_RSS_CFG = 45,
VIRTCHNL_OP_DEL_RSS_CFG = 46,
VIRTCHNL_OP_ADD_FDIR_FILTER = 47,
@@ -263,6 +264,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource);
#define VIRTCHNL_VF_OFFLOAD_RX_ENCAP_CSUM BIT(22)
#define VIRTCHNL_VF_OFFLOAD_ADQ BIT(23)
#define VIRTCHNL_VF_OFFLOAD_USO BIT(25)
+#define VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC BIT(26)
#define VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF BIT(27)
#define VIRTCHNL_VF_OFFLOAD_FDIR_PF BIT(28)
@@ -318,7 +320,9 @@ struct virtchnl_rxq_info {
u16 splithdr_enabled; /* deprecated with AVF 1.0 */
u32 databuffer_size;
u32 max_pkt_size;
- u32 pad1;
+ u8 pad0;
+ u8 rxdid;
+ u8 pad1[2];
u64 dma_ring_addr;
enum virtchnl_rx_hsplit rx_split_pos; /* deprecated with AVF 1.0 */
u32 pad2;
@@ -970,6 +974,10 @@ struct virtchnl_filter {
VIRTCHNL_CHECK_STRUCT_LEN(272, virtchnl_filter);
+struct virtchnl_supported_rxdids {
+ u64 supported_rxdids;
+};
+
/* VIRTCHNL_OP_EVENT
* PF sends this message to inform the VF driver of events that may affect it.
* No direct response is expected from the VF, though it may generate other
@@ -1499,6 +1507,8 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
case VIRTCHNL_OP_DEL_CLOUD_FILTER:
valid_len = sizeof(struct virtchnl_filter);
break;
+ case VIRTCHNL_OP_GET_SUPPORTED_RXDIDS:
+ break;
case VIRTCHNL_OP_ADD_RSS_CFG:
case VIRTCHNL_OP_DEL_RSS_CFG:
valid_len = sizeof(struct virtchnl_rss_cfg);
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 439815cc1ab9..fbad4fcd408e 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -102,8 +102,18 @@ static inline unsigned long wb_stat_error(void)
#endif
}
+/* BDI ratio is expressed as part per 1000000 for finer granularity. */
+#define BDI_RATIO_SCALE 10000
+
+u64 bdi_get_min_bytes(struct backing_dev_info *bdi);
+u64 bdi_get_max_bytes(struct backing_dev_info *bdi);
int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio);
int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
+int bdi_set_min_ratio_no_scale(struct backing_dev_info *bdi, unsigned int min_ratio);
+int bdi_set_max_ratio_no_scale(struct backing_dev_info *bdi, unsigned int max_ratio);
+int bdi_set_min_bytes(struct backing_dev_info *bdi, u64 min_bytes);
+int bdi_set_max_bytes(struct backing_dev_info *bdi, u64 max_bytes);
+int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit);
/*
* Flags in backing_dev_info::capability
diff --git a/include/linux/bcm47xx_nvram.h b/include/linux/bcm47xx_nvram.h
index 53b31f69b74a..7615f8d7b1ed 100644
--- a/include/linux/bcm47xx_nvram.h
+++ b/include/linux/bcm47xx_nvram.h
@@ -11,6 +11,7 @@
#include <linux/vmalloc.h>
#ifdef CONFIG_BCM47XX_NVRAM
+int bcm47xx_nvram_init_from_iomem(void __iomem *nvram_start, size_t res_size);
int bcm47xx_nvram_init_from_mem(u32 base, u32 lim);
int bcm47xx_nvram_getenv(const char *name, char *val, size_t val_len);
int bcm47xx_nvram_gpio_pin(const char *name);
@@ -20,6 +21,11 @@ static inline void bcm47xx_nvram_release_contents(char *nvram)
vfree(nvram);
};
#else
+static inline int bcm47xx_nvram_init_from_iomem(void __iomem *nvram_start,
+ size_t res_size)
+{
+ return -ENOTSUPP;
+}
static inline int bcm47xx_nvram_init_from_mem(u32 base, u32 lim)
{
return -ENOTSUPP;
diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h
index e3314f746bfa..0cb6638b55e5 100644
--- a/include/linux/bcma/bcma_driver_chipcommon.h
+++ b/include/linux/bcma/bcma_driver_chipcommon.h
@@ -4,7 +4,7 @@
#include <linux/platform_device.h>
#include <linux/platform_data/brcmnand.h>
-#include <linux/gpio.h>
+#include <linux/gpio/driver.h>
/** ChipCommon core registers. **/
#define BCMA_CC_ID 0x0000
@@ -271,6 +271,7 @@
#define BCMA_CC_SROM_CONTROL_OP_WRDIS 0x40000000
#define BCMA_CC_SROM_CONTROL_OP_WREN 0x60000000
#define BCMA_CC_SROM_CONTROL_OTPSEL 0x00000010
+#define BCMA_CC_SROM_CONTROL_OTP_PRESENT 0x00000020
#define BCMA_CC_SROM_CONTROL_LOCK 0x00000008
#define BCMA_CC_SROM_CONTROL_SIZE_MASK 0x00000006
#define BCMA_CC_SROM_CONTROL_SIZE_1K 0x00000000
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 3dc20c4f394c..8d51f69f9f5e 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -43,9 +43,6 @@ struct linux_binprm {
* original userspace.
*/
point_of_no_return:1;
-#ifdef __alpha__
- unsigned int taso:1;
-#endif
struct file *executable; /* Executable to pass to the interpreter */
struct file *interpreter;
struct file *file;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index ca22b06700a9..b231a665682a 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -475,8 +475,6 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty);
extern void bio_set_pages_dirty(struct bio *bio);
extern void bio_check_pages_dirty(struct bio *bio);
-extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
- struct bio *src, struct bvec_iter *src_iter);
extern void bio_copy_data(struct bio *dst, struct bio *src);
extern void bio_free_pages(struct bio *bio);
void guard_bio_eod(struct bio *bio);
@@ -509,7 +507,7 @@ static inline void bio_set_dev(struct bio *bio, struct block_device *bdev)
{
bio_clear_flag(bio, BIO_REMAPPED);
if (bio->bi_bdev != bdev)
- bio_clear_flag(bio, BIO_THROTTLED);
+ bio_clear_flag(bio, BIO_BPS_THROTTLED);
bio->bi_bdev = bdev;
bio_associate_blkg(bio);
}
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index f65410a49fda..7d6d73b78147 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -51,6 +51,7 @@ struct device;
* bitmap_empty(src, nbits) Are all bits zero in *src?
* bitmap_full(src, nbits) Are all bits set in *src?
* bitmap_weight(src, nbits) Hamming Weight: number set bits
+ * bitmap_weight_and(src1, src2, nbits) Hamming Weight of and'ed bitmap
* bitmap_set(dst, pos, nbits) Set specified bit area
* bitmap_clear(dst, pos, nbits) Clear specified bit area
* bitmap_find_next_zero_area(buf, len, pos, n, mask) Find bit free area
@@ -164,6 +165,8 @@ bool __bitmap_intersects(const unsigned long *bitmap1,
bool __bitmap_subset(const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int nbits);
unsigned int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits);
+unsigned int __bitmap_weight_and(const unsigned long *bitmap1,
+ const unsigned long *bitmap2, unsigned int nbits);
void __bitmap_set(unsigned long *map, unsigned int start, int len);
void __bitmap_clear(unsigned long *map, unsigned int start, int len);
@@ -222,7 +225,6 @@ void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int n
#else
#define bitmap_copy_le bitmap_copy
#endif
-unsigned int bitmap_ord_to_pos(const unsigned long *bitmap, unsigned int ord, unsigned int nbits);
int bitmap_print_to_pagebuf(bool list, char *buf,
const unsigned long *maskp, int nmaskbits);
@@ -439,6 +441,15 @@ unsigned int bitmap_weight(const unsigned long *src, unsigned int nbits)
return __bitmap_weight(src, nbits);
}
+static __always_inline
+unsigned long bitmap_weight_and(const unsigned long *src1,
+ const unsigned long *src2, unsigned int nbits)
+{
+ if (small_const_nbits(nbits))
+ return hweight_long(*src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits));
+ return __bitmap_weight_and(src1, src2, nbits);
+}
+
static __always_inline void bitmap_set(unsigned long *map, unsigned int start,
unsigned int nbits)
{
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index cf9bf65039f2..2ba557e067fe 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -59,6 +59,7 @@ extern unsigned long __sw_hweight64(__u64 w);
#define __test_and_clear_bit(nr, addr) bitop(___test_and_clear_bit, nr, addr)
#define __test_and_change_bit(nr, addr) bitop(___test_and_change_bit, nr, addr)
#define test_bit(nr, addr) bitop(_test_bit, nr, addr)
+#define test_bit_acquire(nr, addr) bitop(_test_bit_acquire, nr, addr)
/*
* Include this here because some architectures need generic_ffs/fls in
@@ -247,6 +248,25 @@ static inline unsigned long __ffs64(u64 word)
}
/**
+ * fns - find N'th set bit in a word
+ * @word: The word to search
+ * @n: Bit to find
+ */
+static inline unsigned long fns(unsigned long word, unsigned int n)
+{
+ unsigned int bit;
+
+ while (word) {
+ bit = __ffs(word);
+ if (n-- == 0)
+ return bit;
+ __clear_bit(bit, &word);
+ }
+
+ return BITS_PER_LONG;
+}
+
+/**
* assign_bit - Assign value to a bit in memory
* @nr: the bit to set
* @addr: the address to start counting from
@@ -327,10 +347,10 @@ static __always_inline void __assign_bit(long nr, volatile unsigned long *addr,
const typeof(*(ptr)) mask__ = (mask), bits__ = (bits); \
typeof(*(ptr)) old__, new__; \
\
+ old__ = READ_ONCE(*(ptr)); \
do { \
- old__ = READ_ONCE(*(ptr)); \
new__ = (old__ & ~mask__) | bits__; \
- } while (cmpxchg(ptr, old__, new__) != old__); \
+ } while (!try_cmpxchg(ptr, &old__, new__)); \
\
old__; \
})
@@ -342,11 +362,12 @@ static __always_inline void __assign_bit(long nr, volatile unsigned long *addr,
const typeof(*(ptr)) clear__ = (clear), test__ = (test);\
typeof(*(ptr)) old__, new__; \
\
+ old__ = READ_ONCE(*(ptr)); \
do { \
- old__ = READ_ONCE(*(ptr)); \
+ if (old__ & test__) \
+ break; \
new__ = old__ & ~clear__; \
- } while (!(old__ & test__) && \
- cmpxchg(ptr, old__, new__) != old__); \
+ } while (!try_cmpxchg(ptr, &old__, new__)); \
\
!(old__ & test__); \
})
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 9f40dbc65f82..dd5841a42c33 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -18,14 +18,14 @@
struct bio;
struct cgroup_subsys_state;
-struct request_queue;
+struct gendisk;
#define FC_APPID_LEN 129
#ifdef CONFIG_BLK_CGROUP
extern struct cgroup_subsys_state * const blkcg_root_css;
-void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay);
+void blkcg_schedule_throttle(struct gendisk *disk, bool use_memdelay);
void blkcg_maybe_throttle_current(void);
bool blk_cgroup_congested(void);
void blkcg_pin_online(struct cgroup_subsys_state *blkcg_css);
@@ -39,7 +39,6 @@ struct cgroup_subsys_state *bio_blkcg_css(struct bio *bio);
static inline void blkcg_maybe_throttle_current(void) { }
static inline bool blk_cgroup_congested(void) { return false; }
-static inline void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) { }
static inline struct cgroup_subsys_state *bio_blkcg_css(struct bio *bio)
{
return NULL;
diff --git a/include/linux/blk-crypto-profile.h b/include/linux/blk-crypto-profile.h
index bbab65bd5428..e6802b69cdd6 100644
--- a/include/linux/blk-crypto-profile.h
+++ b/include/linux/blk-crypto-profile.h
@@ -138,18 +138,6 @@ int devm_blk_crypto_profile_init(struct device *dev,
unsigned int blk_crypto_keyslot_index(struct blk_crypto_keyslot *slot);
-blk_status_t blk_crypto_get_keyslot(struct blk_crypto_profile *profile,
- const struct blk_crypto_key *key,
- struct blk_crypto_keyslot **slot_ptr);
-
-void blk_crypto_put_keyslot(struct blk_crypto_keyslot *slot);
-
-bool __blk_crypto_cfg_supported(struct blk_crypto_profile *profile,
- const struct blk_crypto_config *cfg);
-
-int __blk_crypto_evict_key(struct blk_crypto_profile *profile,
- const struct blk_crypto_key *key);
-
void blk_crypto_reprogram_all_keys(struct blk_crypto_profile *profile);
void blk_crypto_profile_destroy(struct blk_crypto_profile *profile);
diff --git a/include/linux/blk-crypto.h b/include/linux/blk-crypto.h
index 69b24fe92cbf..1e3e5d0adf12 100644
--- a/include/linux/blk-crypto.h
+++ b/include/linux/blk-crypto.h
@@ -13,6 +13,7 @@ enum blk_crypto_mode_num {
BLK_ENCRYPTION_MODE_AES_256_XTS,
BLK_ENCRYPTION_MODE_AES_128_CBC_ESSIV,
BLK_ENCRYPTION_MODE_ADIANTUM,
+ BLK_ENCRYPTION_MODE_SM4_XTS,
BLK_ENCRYPTION_MODE_MAX,
};
@@ -71,9 +72,6 @@ struct bio_crypt_ctx {
#include <linux/blk_types.h>
#include <linux/blkdev.h>
-struct request;
-struct request_queue;
-
#ifdef CONFIG_BLK_INLINE_ENCRYPTION
static inline bool bio_has_crypt_ctx(struct bio *bio)
@@ -94,13 +92,15 @@ int blk_crypto_init_key(struct blk_crypto_key *blk_key, const u8 *raw_key,
unsigned int dun_bytes,
unsigned int data_unit_size);
-int blk_crypto_start_using_key(const struct blk_crypto_key *key,
- struct request_queue *q);
+int blk_crypto_start_using_key(struct block_device *bdev,
+ const struct blk_crypto_key *key);
-int blk_crypto_evict_key(struct request_queue *q,
+int blk_crypto_evict_key(struct block_device *bdev,
const struct blk_crypto_key *key);
-bool blk_crypto_config_supported(struct request_queue *q,
+bool blk_crypto_config_supported_natively(struct block_device *bdev,
+ const struct blk_crypto_config *cfg);
+bool blk_crypto_config_supported(struct block_device *bdev,
const struct blk_crypto_config *cfg);
#else /* CONFIG_BLK_INLINE_ENCRYPTION */
diff --git a/include/linux/blk-mq-pci.h b/include/linux/blk-mq-pci.h
index 0b1f45c62623..ca544e1d3508 100644
--- a/include/linux/blk-mq-pci.h
+++ b/include/linux/blk-mq-pci.h
@@ -5,7 +5,7 @@
struct blk_mq_queue_map;
struct pci_dev;
-int blk_mq_pci_map_queues(struct blk_mq_queue_map *qmap, struct pci_dev *pdev,
- int offset);
+void blk_mq_pci_map_queues(struct blk_mq_queue_map *qmap, struct pci_dev *pdev,
+ int offset);
#endif /* _LINUX_BLK_MQ_PCI_H */
diff --git a/include/linux/blk-mq-rdma.h b/include/linux/blk-mq-rdma.h
index 5cc5f0f36218..53b58c610e76 100644
--- a/include/linux/blk-mq-rdma.h
+++ b/include/linux/blk-mq-rdma.h
@@ -5,7 +5,7 @@
struct blk_mq_tag_set;
struct ib_device;
-int blk_mq_rdma_map_queues(struct blk_mq_queue_map *map,
+void blk_mq_rdma_map_queues(struct blk_mq_queue_map *map,
struct ib_device *dev, int first_vec);
#endif /* _LINUX_BLK_MQ_RDMA_H */
diff --git a/include/linux/blk-mq-virtio.h b/include/linux/blk-mq-virtio.h
index 687ae287e1dc..13226e9b22dd 100644
--- a/include/linux/blk-mq-virtio.h
+++ b/include/linux/blk-mq-virtio.h
@@ -5,7 +5,7 @@
struct blk_mq_queue_map;
struct virtio_device;
-int blk_mq_virtio_map_queues(struct blk_mq_queue_map *qmap,
+void blk_mq_virtio_map_queues(struct blk_mq_queue_map *qmap,
struct virtio_device *vdev, int first_vec);
#endif /* _LINUX_BLK_MQ_VIRTIO_H */
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index effee1dc715a..779fba613bd0 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -7,6 +7,7 @@
#include <linux/lockdep.h>
#include <linux/scatterlist.h>
#include <linux/prefetch.h>
+#include <linux/srcu.h>
struct blk_mq_tags;
struct blk_flush_queue;
@@ -14,7 +15,12 @@ struct blk_flush_queue;
#define BLKDEV_MIN_RQ 4
#define BLKDEV_DEFAULT_RQ 128
-typedef void (rq_end_io_fn)(struct request *, blk_status_t);
+enum rq_end_io_ret {
+ RQ_END_IO_NONE,
+ RQ_END_IO_FREE,
+};
+
+typedef enum rq_end_io_ret (rq_end_io_fn)(struct request *, blk_status_t);
/*
* request flags */
@@ -135,7 +141,6 @@ struct request {
struct blk_crypto_keyslot *crypt_keyslot;
#endif
- unsigned short write_hint;
unsigned short ioprio;
enum mq_rq_state state;
@@ -268,9 +273,16 @@ static inline void rq_list_move(struct request **src, struct request **dst,
rq_list_add(dst, rq);
}
+/**
+ * enum blk_eh_timer_return - How the timeout handler should proceed
+ * @BLK_EH_DONE: The block driver completed the command or will complete it at
+ * a later time.
+ * @BLK_EH_RESET_TIMER: Reset the request timer and continue waiting for the
+ * request to complete.
+ */
enum blk_eh_timer_return {
- BLK_EH_DONE, /* drivers has completed the command */
- BLK_EH_RESET_TIMER, /* reset timer and try again */
+ BLK_EH_DONE,
+ BLK_EH_RESET_TIMER,
};
#define BLK_TAG_ALLOC_FIFO 0 /* allocate starting from 0 */
@@ -489,6 +501,8 @@ enum hctx_type {
* @tag_list_lock: Serializes tag_list accesses.
* @tag_list: List of the request queues that use this tag set. See also
* request_queue.tag_set_list.
+ * @srcu: Use as lock when type of the request queue is blocking
+ * (BLK_MQ_F_BLOCKING).
*/
struct blk_mq_tag_set {
struct blk_mq_queue_map map[HCTX_MAX_TYPES];
@@ -509,6 +523,7 @@ struct blk_mq_tag_set {
struct mutex tag_list_lock;
struct list_head tag_list;
+ struct srcu_struct *srcu;
};
/**
@@ -630,7 +645,7 @@ struct blk_mq_ops {
* @map_queues: This allows drivers specify their own queue mapping by
* overriding the setup-time function that builds the mq_map.
*/
- int (*map_queues)(struct blk_mq_tag_set *set);
+ void (*map_queues)(struct blk_mq_tag_set *set);
#ifdef CONFIG_BLK_DEBUG_FS
/**
@@ -841,8 +856,10 @@ static inline bool blk_mq_add_to_batch(struct request *req,
struct io_comp_batch *iob, int ioerror,
void (*complete)(struct io_comp_batch *))
{
- if (!iob || (req->rq_flags & RQF_ELV) || req->end_io || ioerror)
+ if (!iob || (req->rq_flags & RQF_ELV) || ioerror ||
+ (req->end_io && !blk_rq_is_passthrough(req)))
return false;
+
if (!iob->complete)
iob->complete = complete;
else if (iob->complete != complete)
@@ -857,7 +874,6 @@ void blk_mq_kick_requeue_list(struct request_queue *q);
void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs);
void blk_mq_complete_request(struct request *rq);
bool blk_mq_complete_request_remote(struct request *rq);
-bool blk_mq_queue_stopped(struct request_queue *q);
void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx);
void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx);
void blk_mq_stop_hw_queues(struct request_queue *q);
@@ -865,7 +881,9 @@ void blk_mq_start_hw_queues(struct request_queue *q);
void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
void blk_mq_quiesce_queue(struct request_queue *q);
-void blk_mq_wait_quiesce_done(struct request_queue *q);
+void blk_mq_wait_quiesce_done(struct blk_mq_tag_set *set);
+void blk_mq_quiesce_tagset(struct blk_mq_tag_set *set);
+void blk_mq_unquiesce_tagset(struct blk_mq_tag_set *set);
void blk_mq_unquiesce_queue(struct request_queue *q);
void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
@@ -881,7 +899,7 @@ void blk_mq_freeze_queue_wait(struct request_queue *q);
int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
unsigned long timeout);
-int blk_mq_map_queues(struct blk_mq_queue_map *qmap);
+void blk_mq_map_queues(struct blk_mq_queue_map *qmap);
void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
void blk_mq_quiesce_queue_nowait(struct request_queue *q);
@@ -964,15 +982,17 @@ blk_status_t blk_insert_cloned_request(struct request *rq);
struct rq_map_data {
struct page **pages;
- int page_order;
- int nr_entries;
unsigned long offset;
- int null_mapped;
- int from_user;
+ unsigned short page_order;
+ unsigned short nr_entries;
+ bool null_mapped;
+ bool from_user;
};
int blk_rq_map_user(struct request_queue *, struct request *,
struct rq_map_data *, void __user *, unsigned long, gfp_t);
+int blk_rq_map_user_io(struct request *, struct rq_map_data *,
+ void __user *, unsigned long, gfp_t, bool, int, bool, int);
int blk_rq_map_user_iov(struct request_queue *, struct request *,
struct rq_map_data *, const struct iov_iter *, gfp_t);
int blk_rq_unmap_user(struct bio *);
@@ -981,6 +1001,7 @@ int blk_rq_map_kern(struct request_queue *, struct request *, void *,
int blk_rq_append_bio(struct request *rq, struct bio *bio);
void blk_execute_rq_nowait(struct request *rq, bool at_head);
blk_status_t blk_execute_rq(struct request *rq, bool at_head);
+bool blk_rq_is_poll(struct request *rq);
struct req_iterator {
struct bvec_iter iter;
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 1ef99790f6ed..99be590f952f 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -321,11 +321,10 @@ enum {
BIO_NO_PAGE_REF, /* don't put release vec pages */
BIO_CLONED, /* doesn't own data */
BIO_BOUNCED, /* bio is a bounce bio */
- BIO_WORKINGSET, /* contains userspace workingset pages */
BIO_QUIET, /* Make BIO Quiet */
BIO_CHAIN, /* chained bio, ->bi_remaining in effect */
BIO_REFFED, /* bio has elevated ->bi_cnt */
- BIO_THROTTLED, /* This bio has already been subjected to
+ BIO_BPS_THROTTLED, /* This bio has already been subjected to
* throttling rules. Don't do it again. */
BIO_TRACE_COMPLETION, /* bio_endio() should trace the final completion
* of this bio. */
@@ -473,13 +472,6 @@ static inline enum req_op bio_op(const struct bio *bio)
return bio->bi_opf & REQ_OP_MASK;
}
-/* obsolete, don't use in new code */
-static inline void bio_set_op_attrs(struct bio *bio, enum req_op op,
- blk_opf_t op_flags)
-{
- bio->bi_opf = op | op_flags;
-}
-
static inline bool op_is_write(blk_opf_t op)
{
return !!(op & (__force blk_opf_t)1);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 84b13fdd34a7..301cf1cf4f2f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -22,7 +22,6 @@
#include <linux/blkzoned.h>
#include <linux/sched.h>
#include <linux/sbitmap.h>
-#include <linux/srcu.h>
#include <linux/uuid.h>
#include <linux/xarray.h>
@@ -156,6 +155,7 @@ struct gendisk {
unsigned open_partitions; /* number of open partitions */
struct backing_dev_info *bdi;
+ struct kobject queue_kobj; /* the queue/ directory */
struct kobject *slave_dir;
#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
struct list_head slave_bdevs;
@@ -311,6 +311,13 @@ struct queue_limits {
unsigned char discard_misaligned;
unsigned char raid_partial_stripes_expensive;
enum blk_zoned_model zoned;
+
+ /*
+ * Drivers that set dma_alignment to less than 511 must be prepared to
+ * handle individual bvec's that are not a multiple of a SECTOR_SIZE
+ * due to possible offsets.
+ */
+ unsigned int dma_alignment;
};
typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx,
@@ -431,10 +438,7 @@ struct request_queue {
struct gendisk *disk;
- /*
- * queue kobject
- */
- struct kobject kobj;
+ refcount_t refs;
/*
* mq queue kobject
@@ -456,12 +460,6 @@ struct request_queue {
unsigned long nr_requests; /* Max # of requests */
unsigned int dma_pad_mask;
- /*
- * Drivers that set dma_alignment to less than 511 must be prepared to
- * handle individual bvec's that are not a multiple of a SECTOR_SIZE
- * due to possible offsets.
- */
- unsigned int dma_alignment;
#ifdef CONFIG_BLK_INLINE_ENCRYPTION
struct blk_crypto_profile *crypto_profile;
@@ -543,18 +541,11 @@ struct request_queue {
struct mutex debugfs_mutex;
bool mq_sysfs_init_done;
-
- /**
- * @srcu: Sleepable RCU. Use as lock when type of the request queue
- * is blocking (BLK_MQ_F_BLOCKING). Must be the last member
- */
- struct srcu_struct srcu[];
};
/* Keep blk_queue_flag_name[] in sync with the definitions below */
#define QUEUE_FLAG_STOPPED 0 /* queue is stopped */
#define QUEUE_FLAG_DYING 1 /* queue being torn down */
-#define QUEUE_FLAG_HAS_SRCU 2 /* SRCU is allocated */
#define QUEUE_FLAG_NOMERGES 3 /* disable merge attempts */
#define QUEUE_FLAG_SAME_COMP 4 /* complete on same CPU-group */
#define QUEUE_FLAG_FAIL_IO 5 /* fake timeout */
@@ -579,10 +570,11 @@ struct request_queue {
#define QUEUE_FLAG_HCTX_ACTIVE 28 /* at least one blk-mq hctx is active */
#define QUEUE_FLAG_NOWAIT 29 /* device supports NOWAIT */
#define QUEUE_FLAG_SQ_SCHED 30 /* single queue style io dispatch */
+#define QUEUE_FLAG_SKIP_TAGSET_QUIESCE 31 /* quiesce_tagset skip the queue*/
-#define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
- (1 << QUEUE_FLAG_SAME_COMP) | \
- (1 << QUEUE_FLAG_NOWAIT))
+#define QUEUE_FLAG_MQ_DEFAULT ((1UL << QUEUE_FLAG_IO_STAT) | \
+ (1UL << QUEUE_FLAG_SAME_COMP) | \
+ (1UL << QUEUE_FLAG_NOWAIT))
void blk_queue_flag_set(unsigned int flag, struct request_queue *q);
void blk_queue_flag_clear(unsigned int flag, struct request_queue *q);
@@ -590,7 +582,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
#define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags)
-#define blk_queue_has_srcu(q) test_bit(QUEUE_FLAG_HAS_SRCU, &(q)->queue_flags)
#define blk_queue_init_done(q) test_bit(QUEUE_FLAG_INIT_DONE, &(q)->queue_flags)
#define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
#define blk_queue_noxmerges(q) \
@@ -618,8 +609,9 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
#define blk_queue_quiesced(q) test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags)
#define blk_queue_pm_only(q) atomic_read(&(q)->pm_only)
#define blk_queue_registered(q) test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags)
-#define blk_queue_nowait(q) test_bit(QUEUE_FLAG_NOWAIT, &(q)->queue_flags)
#define blk_queue_sq_sched(q) test_bit(QUEUE_FLAG_SQ_SCHED, &(q)->queue_flags)
+#define blk_queue_skip_tagset_quiesce(q) \
+ test_bit(QUEUE_FLAG_SKIP_TAGSET_QUIESCE, &(q)->queue_flags)
extern void blk_set_pm_only(struct request_queue *q);
extern void blk_clear_pm_only(struct request_queue *q);
@@ -840,7 +832,6 @@ void set_capacity(struct gendisk *disk, sector_t size);
#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk);
-int bd_register_pending_holders(struct gendisk *disk);
#else
static inline int bd_link_disk_holder(struct block_device *bdev,
struct gendisk *disk)
@@ -851,10 +842,6 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev,
struct gendisk *disk)
{
}
-static inline int bd_register_pending_holders(struct gendisk *disk)
-{
- return 0;
-}
#endif /* CONFIG_BLOCK_HOLDER_DEPRECATED */
dev_t part_devt(struct gendisk *disk, u8 partno);
@@ -945,7 +932,6 @@ extern void blk_queue_io_min(struct request_queue *q, unsigned int min);
extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt);
extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt);
extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth);
-extern void blk_set_default_limits(struct queue_limits *lim);
extern void blk_set_stacking_limits(struct queue_limits *lim);
extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
sector_t offset);
@@ -1280,6 +1266,11 @@ static inline bool bdev_fua(struct block_device *bdev)
return test_bit(QUEUE_FLAG_FUA, &bdev_get_queue(bdev)->queue_flags);
}
+static inline bool bdev_nowait(struct block_device *bdev)
+{
+ return test_bit(QUEUE_FLAG_NOWAIT, &bdev_get_queue(bdev)->queue_flags);
+}
+
static inline enum blk_zoned_model bdev_zoned_model(struct block_device *bdev)
{
struct request_queue *q = bdev_get_queue(bdev);
@@ -1300,6 +1291,15 @@ static inline bool bdev_is_zoned(struct block_device *bdev)
return false;
}
+static inline bool bdev_op_is_zoned_write(struct block_device *bdev,
+ blk_opf_t op)
+{
+ if (!bdev_is_zoned(bdev))
+ return false;
+
+ return op == REQ_OP_WRITE || op == REQ_OP_WRITE_ZEROES;
+}
+
static inline sector_t bdev_zone_sectors(struct block_device *bdev)
{
struct request_queue *q = bdev_get_queue(bdev);
@@ -1311,7 +1311,7 @@ static inline sector_t bdev_zone_sectors(struct block_device *bdev)
static inline int queue_dma_alignment(const struct request_queue *q)
{
- return q ? q->dma_alignment : 511;
+ return q ? q->limits.dma_alignment : 511;
}
static inline unsigned int bdev_dma_alignment(struct block_device *bdev)
@@ -1336,12 +1336,7 @@ static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr,
/* assumes size > 256 */
static inline unsigned int blksize_bits(unsigned int size)
{
- unsigned int bits = 8;
- do {
- bits++;
- size >>= 1;
- } while (size > 256);
- return bits;
+ return order_base_2(size >> SECTOR_SHIFT) + SECTOR_SHIFT;
}
static inline unsigned int block_size(struct block_device *bdev)
@@ -1400,7 +1395,6 @@ struct block_device_operations {
void (*swap_slot_free_notify) (struct block_device *, unsigned long);
int (*report_zones)(struct gendisk *, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data);
- char *(*devnode)(struct gendisk *disk, umode_t *mode);
/* returns the length of the identifier or a negative errno: */
int (*get_unique_id)(struct gendisk *disk, u8 id[16],
enum blk_unique_id id_type);
@@ -1445,7 +1439,6 @@ unsigned long bdev_start_io_acct(struct block_device *bdev,
void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
unsigned long start_time);
-void bio_start_io_acct_time(struct bio *bio, unsigned long start_time);
unsigned long bio_start_io_acct(struct bio *bio);
void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,
struct block_device *orig_bdev);
@@ -1498,6 +1491,7 @@ int sync_blockdev(struct block_device *bdev);
int sync_blockdev_range(struct block_device *bdev, loff_t lstart, loff_t lend);
int sync_blockdev_nowait(struct block_device *bdev);
void sync_bdevs(bool wait);
+void bdev_statx_dioalign(struct inode *inode, struct kstat *stat);
void printk_all_partitions(void);
#else
static inline void invalidate_bdev(struct block_device *bdev)
@@ -1514,6 +1508,9 @@ static inline int sync_blockdev_nowait(struct block_device *bdev)
static inline void sync_bdevs(bool wait)
{
}
+static inline void bdev_statx_dioalign(struct inode *inode, struct kstat *stat)
+{
+}
static inline void printk_all_partitions(void)
{
}
diff --git a/include/linux/bma150.h b/include/linux/bma150.h
index 31c9e323a391..4d4a62d49341 100644
--- a/include/linux/bma150.h
+++ b/include/linux/bma150.h
@@ -33,8 +33,8 @@ struct bma150_cfg {
unsigned char lg_hyst; /* Low-G hysterisis */
unsigned char lg_dur; /* Low-G duration */
unsigned char lg_thres; /* Low-G threshold */
- unsigned char range; /* one of BMA0150_RANGE_xxx */
- unsigned char bandwidth; /* one of BMA0150_BW_xxx */
+ unsigned char range; /* one of BMA150_RANGE_xxx */
+ unsigned char bandwidth; /* one of BMA150_BW_xxx */
};
struct bma150_platform_data {
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 2bd1b5f8de9b..57e9e109257e 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -414,6 +414,11 @@ int cgroup_bpf_prog_detach(const union bpf_attr *attr,
int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
int cgroup_bpf_prog_query(const union bpf_attr *attr,
union bpf_attr __user *uattr);
+
+const struct bpf_func_proto *
+cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog);
+const struct bpf_func_proto *
+cgroup_current_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog);
#else
static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
@@ -444,6 +449,18 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
return -EINVAL;
}
+static inline const struct bpf_func_proto *
+cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+ return NULL;
+}
+
+static inline const struct bpf_func_proto *
+cgroup_current_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+ return NULL;
+}
+
static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux,
struct bpf_map *map) { return 0; }
static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 20c26aed7896..3de24cfb7a3d 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -27,6 +27,7 @@
#include <linux/bpfptr.h>
#include <linux/btf.h>
#include <linux/rcupdate_trace.h>
+#include <linux/static_call.h>
struct bpf_verifier_env;
struct bpf_verifier_log;
@@ -48,10 +49,13 @@ struct mem_cgroup;
struct module;
struct bpf_func_state;
struct ftrace_ops;
+struct cgroup;
extern struct idr btf_idr;
extern spinlock_t btf_idr_lock;
extern struct kobject *btf_kobj;
+extern struct bpf_mem_alloc bpf_global_ma;
+extern bool bpf_global_ma_set;
typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64);
typedef int (*bpf_iter_init_seq_priv_t)(void *private_data,
@@ -83,7 +87,8 @@ struct bpf_map_ops {
int (*map_lookup_and_delete_batch)(struct bpf_map *map,
const union bpf_attr *attr,
union bpf_attr __user *uattr);
- int (*map_update_batch)(struct bpf_map *map, const union bpf_attr *attr,
+ int (*map_update_batch)(struct bpf_map *map, struct file *map_file,
+ const union bpf_attr *attr,
union bpf_attr __user *uattr);
int (*map_delete_batch)(struct bpf_map *map, const union bpf_attr *attr,
union bpf_attr __user *uattr);
@@ -133,7 +138,7 @@ struct bpf_map_ops {
struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(void *owner);
/* Misc helpers.*/
- int (*map_redirect)(struct bpf_map *map, u32 ifindex, u64 flags);
+ int (*map_redirect)(struct bpf_map *map, u64 key, u64 flags);
/* map_meta_equal must be implemented for maps that can be
* used as an inner map. It is a runtime check to ensure
@@ -163,38 +168,55 @@ struct bpf_map_ops {
};
enum {
- /* Support at most 8 pointers in a BPF map value */
- BPF_MAP_VALUE_OFF_MAX = 8,
- BPF_MAP_OFF_ARR_MAX = BPF_MAP_VALUE_OFF_MAX +
- 1 + /* for bpf_spin_lock */
- 1, /* for bpf_timer */
+ /* Support at most 10 fields in a BTF type */
+ BTF_FIELDS_MAX = 10,
};
-enum bpf_kptr_type {
- BPF_KPTR_UNREF,
- BPF_KPTR_REF,
+enum btf_field_type {
+ BPF_SPIN_LOCK = (1 << 0),
+ BPF_TIMER = (1 << 1),
+ BPF_KPTR_UNREF = (1 << 2),
+ BPF_KPTR_REF = (1 << 3),
+ BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF,
+ BPF_LIST_HEAD = (1 << 4),
+ BPF_LIST_NODE = (1 << 5),
};
-struct bpf_map_value_off_desc {
+struct btf_field_kptr {
+ struct btf *btf;
+ struct module *module;
+ btf_dtor_kfunc_t dtor;
+ u32 btf_id;
+};
+
+struct btf_field_list_head {
+ struct btf *btf;
+ u32 value_btf_id;
+ u32 node_offset;
+ struct btf_record *value_rec;
+};
+
+struct btf_field {
u32 offset;
- enum bpf_kptr_type type;
- struct {
- struct btf *btf;
- struct module *module;
- btf_dtor_kfunc_t dtor;
- u32 btf_id;
- } kptr;
+ enum btf_field_type type;
+ union {
+ struct btf_field_kptr kptr;
+ struct btf_field_list_head list_head;
+ };
};
-struct bpf_map_value_off {
- u32 nr_off;
- struct bpf_map_value_off_desc off[];
+struct btf_record {
+ u32 cnt;
+ u32 field_mask;
+ int spin_lock_off;
+ int timer_off;
+ struct btf_field fields[];
};
-struct bpf_map_off_arr {
+struct btf_field_offs {
u32 cnt;
- u32 field_off[BPF_MAP_OFF_ARR_MAX];
- u8 field_sz[BPF_MAP_OFF_ARR_MAX];
+ u32 field_off[BTF_FIELDS_MAX];
+ u8 field_sz[BTF_FIELDS_MAX];
};
struct bpf_map {
@@ -212,10 +234,8 @@ struct bpf_map {
u32 max_entries;
u64 map_extra; /* any per-map-type extra fields */
u32 map_flags;
- int spin_lock_off; /* >=0 valid offset, <0 error */
- struct bpf_map_value_off *kptr_off_tab;
- int timer_off; /* >=0 valid offset, <0 error */
u32 id;
+ struct btf_record *record;
int numa_node;
u32 btf_key_type_id;
u32 btf_value_type_id;
@@ -225,7 +245,7 @@ struct bpf_map {
struct obj_cgroup *objcg;
#endif
char name[BPF_OBJ_NAME_LEN];
- struct bpf_map_off_arr *off_arr;
+ struct btf_field_offs *field_offs;
/* The 3rd and 4th cacheline with misc members to avoid false sharing
* particularly with refcounting.
*/
@@ -249,58 +269,171 @@ struct bpf_map {
bool frozen; /* write-once; write-protected by freeze_mutex */
};
-static inline bool map_value_has_spin_lock(const struct bpf_map *map)
-{
- return map->spin_lock_off >= 0;
+static inline const char *btf_field_type_name(enum btf_field_type type)
+{
+ switch (type) {
+ case BPF_SPIN_LOCK:
+ return "bpf_spin_lock";
+ case BPF_TIMER:
+ return "bpf_timer";
+ case BPF_KPTR_UNREF:
+ case BPF_KPTR_REF:
+ return "kptr";
+ case BPF_LIST_HEAD:
+ return "bpf_list_head";
+ case BPF_LIST_NODE:
+ return "bpf_list_node";
+ default:
+ WARN_ON_ONCE(1);
+ return "unknown";
+ }
}
-static inline bool map_value_has_timer(const struct bpf_map *map)
+static inline u32 btf_field_type_size(enum btf_field_type type)
+{
+ switch (type) {
+ case BPF_SPIN_LOCK:
+ return sizeof(struct bpf_spin_lock);
+ case BPF_TIMER:
+ return sizeof(struct bpf_timer);
+ case BPF_KPTR_UNREF:
+ case BPF_KPTR_REF:
+ return sizeof(u64);
+ case BPF_LIST_HEAD:
+ return sizeof(struct bpf_list_head);
+ case BPF_LIST_NODE:
+ return sizeof(struct bpf_list_node);
+ default:
+ WARN_ON_ONCE(1);
+ return 0;
+ }
+}
+
+static inline u32 btf_field_type_align(enum btf_field_type type)
+{
+ switch (type) {
+ case BPF_SPIN_LOCK:
+ return __alignof__(struct bpf_spin_lock);
+ case BPF_TIMER:
+ return __alignof__(struct bpf_timer);
+ case BPF_KPTR_UNREF:
+ case BPF_KPTR_REF:
+ return __alignof__(u64);
+ case BPF_LIST_HEAD:
+ return __alignof__(struct bpf_list_head);
+ case BPF_LIST_NODE:
+ return __alignof__(struct bpf_list_node);
+ default:
+ WARN_ON_ONCE(1);
+ return 0;
+ }
+}
+
+static inline bool btf_record_has_field(const struct btf_record *rec, enum btf_field_type type)
{
- return map->timer_off >= 0;
+ if (IS_ERR_OR_NULL(rec))
+ return false;
+ return rec->field_mask & type;
}
-static inline bool map_value_has_kptrs(const struct bpf_map *map)
+static inline void bpf_obj_init(const struct btf_field_offs *foffs, void *obj)
{
- return !IS_ERR_OR_NULL(map->kptr_off_tab);
+ int i;
+
+ if (!foffs)
+ return;
+ for (i = 0; i < foffs->cnt; i++)
+ memset(obj + foffs->field_off[i], 0, foffs->field_sz[i]);
}
static inline void check_and_init_map_value(struct bpf_map *map, void *dst)
{
- if (unlikely(map_value_has_spin_lock(map)))
- memset(dst + map->spin_lock_off, 0, sizeof(struct bpf_spin_lock));
- if (unlikely(map_value_has_timer(map)))
- memset(dst + map->timer_off, 0, sizeof(struct bpf_timer));
- if (unlikely(map_value_has_kptrs(map))) {
- struct bpf_map_value_off *tab = map->kptr_off_tab;
- int i;
+ bpf_obj_init(map->field_offs, dst);
+}
- for (i = 0; i < tab->nr_off; i++)
- *(u64 *)(dst + tab->off[i].offset) = 0;
+/* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
+ * forced to use 'long' read/writes to try to atomically copy long counters.
+ * Best-effort only. No barriers here, since it _will_ race with concurrent
+ * updates from BPF programs. Called from bpf syscall and mostly used with
+ * size 8 or 16 bytes, so ask compiler to inline it.
+ */
+static inline void bpf_long_memcpy(void *dst, const void *src, u32 size)
+{
+ const long *lsrc = src;
+ long *ldst = dst;
+
+ size /= sizeof(long);
+ while (size--)
+ *ldst++ = *lsrc++;
+}
+
+/* copy everything but bpf_spin_lock, bpf_timer, and kptrs. There could be one of each. */
+static inline void bpf_obj_memcpy(struct btf_field_offs *foffs,
+ void *dst, void *src, u32 size,
+ bool long_memcpy)
+{
+ u32 curr_off = 0;
+ int i;
+
+ if (likely(!foffs)) {
+ if (long_memcpy)
+ bpf_long_memcpy(dst, src, round_up(size, 8));
+ else
+ memcpy(dst, src, size);
+ return;
+ }
+
+ for (i = 0; i < foffs->cnt; i++) {
+ u32 next_off = foffs->field_off[i];
+ u32 sz = next_off - curr_off;
+
+ memcpy(dst + curr_off, src + curr_off, sz);
+ curr_off += foffs->field_sz[i] + sz;
}
+ memcpy(dst + curr_off, src + curr_off, size - curr_off);
}
-/* copy everything but bpf_spin_lock and bpf_timer. There could be one of each. */
static inline void copy_map_value(struct bpf_map *map, void *dst, void *src)
{
+ bpf_obj_memcpy(map->field_offs, dst, src, map->value_size, false);
+}
+
+static inline void copy_map_value_long(struct bpf_map *map, void *dst, void *src)
+{
+ bpf_obj_memcpy(map->field_offs, dst, src, map->value_size, true);
+}
+
+static inline void bpf_obj_memzero(struct btf_field_offs *foffs, void *dst, u32 size)
+{
u32 curr_off = 0;
int i;
- if (likely(!map->off_arr)) {
- memcpy(dst, src, map->value_size);
+ if (likely(!foffs)) {
+ memset(dst, 0, size);
return;
}
- for (i = 0; i < map->off_arr->cnt; i++) {
- u32 next_off = map->off_arr->field_off[i];
+ for (i = 0; i < foffs->cnt; i++) {
+ u32 next_off = foffs->field_off[i];
+ u32 sz = next_off - curr_off;
- memcpy(dst + curr_off, src + curr_off, next_off - curr_off);
- curr_off += map->off_arr->field_sz[i];
+ memset(dst + curr_off, 0, sz);
+ curr_off += foffs->field_sz[i] + sz;
}
- memcpy(dst + curr_off, src + curr_off, map->value_size - curr_off);
+ memset(dst + curr_off, 0, size - curr_off);
}
+
+static inline void zero_map_value(struct bpf_map *map, void *dst)
+{
+ bpf_obj_memzero(map->field_offs, dst, map->value_size);
+}
+
void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
bool lock_src);
void bpf_timer_cancel_and_free(void *timer);
+void bpf_list_head_free(const struct btf_field *field, void *list_head,
+ struct bpf_spin_lock *spin_lock);
+
int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size);
struct bpf_offload_dev;
@@ -369,10 +502,8 @@ enum bpf_type_flag {
*/
MEM_RDONLY = BIT(1 + BPF_BASE_TYPE_BITS),
- /* MEM was "allocated" from a different helper, and cannot be mixed
- * with regular non-MEM_ALLOC'ed MEM types.
- */
- MEM_ALLOC = BIT(2 + BPF_BASE_TYPE_BITS),
+ /* MEM points to BPF ring buffer reservation. */
+ MEM_RINGBUF = BIT(2 + BPF_BASE_TYPE_BITS),
/* MEM is in user address space. */
MEM_USER = BIT(3 + BPF_BASE_TYPE_BITS),
@@ -401,12 +532,49 @@ enum bpf_type_flag {
/* DYNPTR points to memory local to the bpf program. */
DYNPTR_TYPE_LOCAL = BIT(8 + BPF_BASE_TYPE_BITS),
- /* DYNPTR points to a ringbuf record. */
+ /* DYNPTR points to a kernel-produced ringbuf record. */
DYNPTR_TYPE_RINGBUF = BIT(9 + BPF_BASE_TYPE_BITS),
/* Size is known at compile time. */
MEM_FIXED_SIZE = BIT(10 + BPF_BASE_TYPE_BITS),
+ /* MEM is of an allocated object of type in program BTF. This is used to
+ * tag PTR_TO_BTF_ID allocated using bpf_obj_new.
+ */
+ MEM_ALLOC = BIT(11 + BPF_BASE_TYPE_BITS),
+
+ /* PTR was passed from the kernel in a trusted context, and may be
+ * passed to KF_TRUSTED_ARGS kfuncs or BPF helper functions.
+ * Confusingly, this is _not_ the opposite of PTR_UNTRUSTED above.
+ * PTR_UNTRUSTED refers to a kptr that was read directly from a map
+ * without invoking bpf_kptr_xchg(). What we really need to know is
+ * whether a pointer is safe to pass to a kfunc or BPF helper function.
+ * While PTR_UNTRUSTED pointers are unsafe to pass to kfuncs and BPF
+ * helpers, they do not cover all possible instances of unsafe
+ * pointers. For example, a pointer that was obtained from walking a
+ * struct will _not_ get the PTR_UNTRUSTED type modifier, despite the
+ * fact that it may be NULL, invalid, etc. This is due to backwards
+ * compatibility requirements, as this was the behavior that was first
+ * introduced when kptrs were added. The behavior is now considered
+ * deprecated, and PTR_UNTRUSTED will eventually be removed.
+ *
+ * PTR_TRUSTED, on the other hand, is a pointer that the kernel
+ * guarantees to be valid and safe to pass to kfuncs and BPF helpers.
+ * For example, pointers passed to tracepoint arguments are considered
+ * PTR_TRUSTED, as are pointers that are passed to struct_ops
+ * callbacks. As alluded to above, pointers that are obtained from
+ * walking PTR_TRUSTED pointers are _not_ trusted. For example, if a
+ * struct task_struct *task is PTR_TRUSTED, then accessing
+ * task->last_wakee will lose the PTR_TRUSTED modifier when it's stored
+ * in a BPF register. Similarly, pointers passed to certain programs
+ * types such as kretprobes are not guaranteed to be valid, as they may
+ * for example contain an object that was recently freed.
+ */
+ PTR_TRUSTED = BIT(12 + BPF_BASE_TYPE_BITS),
+
+ /* MEM is tagged with rcu and memory access needs rcu_read_lock protection. */
+ MEM_RCU = BIT(13 + BPF_BASE_TYPE_BITS),
+
__BPF_TYPE_FLAG_MAX,
__BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1,
};
@@ -446,7 +614,7 @@ enum bpf_arg_type {
ARG_PTR_TO_LONG, /* pointer to long */
ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */
ARG_PTR_TO_BTF_ID, /* pointer to in-kernel struct */
- ARG_PTR_TO_ALLOC_MEM, /* pointer to dynamically allocated memory */
+ ARG_PTR_TO_RINGBUF_MEM, /* pointer to dynamically reserved ringbuf memory */
ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */
ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */
ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */
@@ -463,7 +631,6 @@ enum bpf_arg_type {
ARG_PTR_TO_MEM_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_MEM,
ARG_PTR_TO_CTX_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_CTX,
ARG_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_SOCKET,
- ARG_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_ALLOC_MEM,
ARG_PTR_TO_STACK_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_STACK,
ARG_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_BTF_ID,
/* pointer to memory does not need to be initialized, helper function must fill
@@ -488,7 +655,7 @@ enum bpf_return_type {
RET_PTR_TO_SOCKET, /* returns a pointer to a socket */
RET_PTR_TO_TCP_SOCK, /* returns a pointer to a tcp_sock */
RET_PTR_TO_SOCK_COMMON, /* returns a pointer to a sock_common */
- RET_PTR_TO_ALLOC_MEM, /* returns a pointer to dynamically allocated memory */
+ RET_PTR_TO_MEM, /* returns a pointer to memory */
RET_PTR_TO_MEM_OR_BTF_ID, /* returns a pointer to a valid memory or a btf_id */
RET_PTR_TO_BTF_ID, /* returns a pointer to a btf_id */
__BPF_RET_TYPE_MAX,
@@ -498,9 +665,10 @@ enum bpf_return_type {
RET_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCKET,
RET_PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_TCP_SOCK,
RET_PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCK_COMMON,
- RET_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | MEM_ALLOC | RET_PTR_TO_ALLOC_MEM,
- RET_PTR_TO_DYNPTR_MEM_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_ALLOC_MEM,
+ RET_PTR_TO_RINGBUF_MEM_OR_NULL = PTR_MAYBE_NULL | MEM_RINGBUF | RET_PTR_TO_MEM,
+ RET_PTR_TO_DYNPTR_MEM_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_MEM,
RET_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_BTF_ID,
+ RET_PTR_TO_BTF_ID_TRUSTED = PTR_TRUSTED | RET_PTR_TO_BTF_ID,
/* This must be the last entry. Its purpose is to ensure the enum is
* wide enough to hold the higher bits reserved for bpf_type_flag.
@@ -517,6 +685,7 @@ struct bpf_func_proto {
u64 (*func)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
bool gpl_only;
bool pkt_access;
+ bool might_sleep;
enum bpf_return_type ret_type;
union {
struct {
@@ -606,6 +775,7 @@ enum bpf_reg_type {
PTR_TO_MEM, /* reg points to valid memory region */
PTR_TO_BUF, /* reg points to a read/write buffer */
PTR_TO_FUNC, /* reg points to a bpf program function */
+ CONST_PTR_TO_DYNPTR, /* reg points to a const struct bpf_dynptr */
__BPF_REG_TYPE_MAX,
/* Extended reg_types. */
@@ -654,6 +824,7 @@ struct bpf_prog_ops {
union bpf_attr __user *uattr);
};
+struct bpf_reg_state;
struct bpf_verifier_ops {
/* return eBPF function prototype for verification */
const struct bpf_func_proto *
@@ -675,9 +846,8 @@ struct bpf_verifier_ops {
struct bpf_insn *dst,
struct bpf_prog *prog, u32 *target_size);
int (*btf_struct_access)(struct bpf_verifier_log *log,
- const struct btf *btf,
- const struct btf_type *t, int off, int size,
- enum bpf_access_type atype,
+ const struct bpf_reg_state *reg,
+ int off, int size, enum bpf_access_type atype,
u32 *next_btf_id, enum bpf_type_flag *flag);
};
@@ -726,10 +896,14 @@ enum bpf_cgroup_storage_type {
*/
#define MAX_BPF_FUNC_REG_ARGS 5
+/* The argument is a structure. */
+#define BTF_FMODEL_STRUCT_ARG BIT(0)
+
struct btf_func_model {
u8 ret_size;
u8 nr_args;
u8 arg_size[MAX_BPF_FUNC_ARGS];
+ u8 arg_flags[MAX_BPF_FUNC_ARGS];
};
/* Restore arguments before returning from trampoline to let original function
@@ -799,18 +973,18 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *i
const struct btf_func_model *m, u32 flags,
struct bpf_tramp_links *tlinks,
void *orig_call);
-/* these two functions are called from generated trampoline */
-u64 notrace __bpf_prog_enter(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx);
-void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_run_ctx *run_ctx);
-u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx);
-void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start,
- struct bpf_tramp_run_ctx *run_ctx);
-u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog,
- struct bpf_tramp_run_ctx *run_ctx);
-void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start,
- struct bpf_tramp_run_ctx *run_ctx);
+u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog,
+ struct bpf_tramp_run_ctx *run_ctx);
+void notrace __bpf_prog_exit_sleepable_recur(struct bpf_prog *prog, u64 start,
+ struct bpf_tramp_run_ctx *run_ctx);
void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr);
void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr);
+typedef u64 (*bpf_trampoline_enter_t)(struct bpf_prog *prog,
+ struct bpf_tramp_run_ctx *run_ctx);
+typedef void (*bpf_trampoline_exit_t)(struct bpf_prog *prog, u64 start,
+ struct bpf_tramp_run_ctx *run_ctx);
+bpf_trampoline_enter_t bpf_trampoline_enter(const struct bpf_prog *prog);
+bpf_trampoline_exit_t bpf_trampoline_exit(const struct bpf_prog *prog);
struct bpf_ksym {
unsigned long start;
@@ -891,8 +1065,13 @@ struct bpf_dispatcher {
struct bpf_dispatcher_prog progs[BPF_DISPATCHER_MAX];
int num_progs;
void *image;
+ void *rw_image;
u32 image_off;
struct bpf_ksym ksym;
+#ifdef CONFIG_HAVE_STATIC_CALL
+ struct static_call_key *sc_key;
+ void *sc_tramp;
+#endif
};
static __always_inline __nocfi unsigned int bpf_dispatcher_nop_func(
@@ -909,7 +1088,35 @@ int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampolin
struct bpf_trampoline *bpf_trampoline_get(u64 key,
struct bpf_attach_target_info *tgt_info);
void bpf_trampoline_put(struct bpf_trampoline *tr);
-int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs);
+int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_funcs);
+
+/*
+ * When the architecture supports STATIC_CALL replace the bpf_dispatcher_fn
+ * indirection with a direct call to the bpf program. If the architecture does
+ * not have STATIC_CALL, avoid a double-indirection.
+ */
+#ifdef CONFIG_HAVE_STATIC_CALL
+
+#define __BPF_DISPATCHER_SC_INIT(_name) \
+ .sc_key = &STATIC_CALL_KEY(_name), \
+ .sc_tramp = STATIC_CALL_TRAMP_ADDR(_name),
+
+#define __BPF_DISPATCHER_SC(name) \
+ DEFINE_STATIC_CALL(bpf_dispatcher_##name##_call, bpf_dispatcher_nop_func)
+
+#define __BPF_DISPATCHER_CALL(name) \
+ static_call(bpf_dispatcher_##name##_call)(ctx, insnsi, bpf_func)
+
+#define __BPF_DISPATCHER_UPDATE(_d, _new) \
+ __static_call_update((_d)->sc_key, (_d)->sc_tramp, (_new))
+
+#else
+#define __BPF_DISPATCHER_SC_INIT(name)
+#define __BPF_DISPATCHER_SC(name)
+#define __BPF_DISPATCHER_CALL(name) bpf_func(ctx, insnsi)
+#define __BPF_DISPATCHER_UPDATE(_d, _new)
+#endif
+
#define BPF_DISPATCHER_INIT(_name) { \
.mutex = __MUTEX_INITIALIZER(_name.mutex), \
.func = &_name##_func, \
@@ -921,31 +1128,34 @@ int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs);
.name = #_name, \
.lnode = LIST_HEAD_INIT(_name.ksym.lnode), \
}, \
+ __BPF_DISPATCHER_SC_INIT(_name##_call) \
}
#define DEFINE_BPF_DISPATCHER(name) \
+ __BPF_DISPATCHER_SC(name); \
noinline __nocfi unsigned int bpf_dispatcher_##name##_func( \
const void *ctx, \
const struct bpf_insn *insnsi, \
bpf_func_t bpf_func) \
{ \
- return bpf_func(ctx, insnsi); \
+ return __BPF_DISPATCHER_CALL(name); \
} \
EXPORT_SYMBOL(bpf_dispatcher_##name##_func); \
struct bpf_dispatcher bpf_dispatcher_##name = \
BPF_DISPATCHER_INIT(bpf_dispatcher_##name);
+
#define DECLARE_BPF_DISPATCHER(name) \
unsigned int bpf_dispatcher_##name##_func( \
const void *ctx, \
const struct bpf_insn *insnsi, \
bpf_func_t bpf_func); \
extern struct bpf_dispatcher bpf_dispatcher_##name;
+
#define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_##name##_func
#define BPF_DISPATCHER_PTR(name) (&bpf_dispatcher_##name)
void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
struct bpf_prog *to);
/* Called only from JIT-enabled code, so there's no need for stubs. */
-void *bpf_jit_alloc_exec_page(void);
void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym);
void bpf_image_ksym_del(struct bpf_ksym *ksym);
void bpf_ksym_add(struct bpf_ksym *ksym);
@@ -1333,6 +1543,11 @@ struct bpf_array {
#define BPF_MAP_CAN_READ BIT(0)
#define BPF_MAP_CAN_WRITE BIT(1)
+/* Maximum number of user-producer ring buffer samples that can be drained in
+ * a call to bpf_user_ringbuf_drain().
+ */
+#define BPF_MAX_USER_RINGBUF_SAMPLES (128 * 1024)
+
static inline u32 bpf_map_flags_to_cap(struct bpf_map *map)
{
u32 access_flags = map->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG);
@@ -1620,11 +1835,14 @@ void bpf_prog_put(struct bpf_prog *prog);
void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock);
void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock);
-struct bpf_map_value_off_desc *bpf_map_kptr_off_contains(struct bpf_map *map, u32 offset);
-void bpf_map_free_kptr_off_tab(struct bpf_map *map);
-struct bpf_map_value_off *bpf_map_copy_kptr_off_tab(const struct bpf_map *map);
-bool bpf_map_equal_kptr_off_tab(const struct bpf_map *map_a, const struct bpf_map *map_b);
-void bpf_map_free_kptrs(struct bpf_map *map, void *map_value);
+struct btf_field *btf_record_find(const struct btf_record *rec,
+ u32 offset, enum btf_field_type type);
+void btf_record_free(struct btf_record *rec);
+void bpf_map_free_record(struct bpf_map *map);
+struct btf_record *btf_record_dup(const struct btf_record *rec);
+bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b);
+void bpf_obj_free_timer(const struct btf_record *rec, void *obj);
+void bpf_obj_free_fields(const struct btf_record *rec, void *obj);
struct bpf_map *bpf_map_get(u32 ufd);
struct bpf_map *bpf_map_get_with_uref(u32 ufd);
@@ -1642,7 +1860,7 @@ void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr);
int generic_map_lookup_batch(struct bpf_map *map,
const union bpf_attr *attr,
union bpf_attr __user *uattr);
-int generic_map_update_batch(struct bpf_map *map,
+int generic_map_update_batch(struct bpf_map *map, struct file *map_file,
const union bpf_attr *attr,
union bpf_attr __user *uattr);
int generic_map_delete_batch(struct bpf_map *map,
@@ -1691,11 +1909,6 @@ static inline bool bpf_allow_uninit_stack(void)
return perfmon_capable();
}
-static inline bool bpf_allow_ptr_to_map_access(void)
-{
- return perfmon_capable();
-}
-
static inline bool bpf_bypass_spec_v1(void)
{
return perfmon_capable();
@@ -1729,8 +1942,40 @@ int bpf_obj_get_user(const char __user *pathname, int flags);
extern int bpf_iter_ ## target(args); \
int __init bpf_iter_ ## target(args) { return 0; }
+/*
+ * The task type of iterators.
+ *
+ * For BPF task iterators, they can be parameterized with various
+ * parameters to visit only some of tasks.
+ *
+ * BPF_TASK_ITER_ALL (default)
+ * Iterate over resources of every task.
+ *
+ * BPF_TASK_ITER_TID
+ * Iterate over resources of a task/tid.
+ *
+ * BPF_TASK_ITER_TGID
+ * Iterate over resources of every task of a process / task group.
+ */
+enum bpf_iter_task_type {
+ BPF_TASK_ITER_ALL = 0,
+ BPF_TASK_ITER_TID,
+ BPF_TASK_ITER_TGID,
+};
+
struct bpf_iter_aux_info {
+ /* for map_elem iter */
struct bpf_map *map;
+
+ /* for cgroup iter */
+ struct {
+ struct cgroup *start; /* starting cgroup */
+ enum bpf_cgroup_iter_order order;
+ } cgroup;
+ struct {
+ enum bpf_iter_task_type type;
+ u32 pid;
+ } task;
};
typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog,
@@ -1815,22 +2060,6 @@ int bpf_get_file_flag(int flags);
int bpf_check_uarg_tail_zero(bpfptr_t uaddr, size_t expected_size,
size_t actual_size);
-/* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
- * forced to use 'long' read/writes to try to atomically copy long counters.
- * Best-effort only. No barriers here, since it _will_ race with concurrent
- * updates from BPF programs. Called from bpf syscall and mostly used with
- * size 8 or 16 bytes, so ask compiler to inline it.
- */
-static inline void bpf_long_memcpy(void *dst, const void *src, u32 size)
-{
- const long *lsrc = src;
- long *ldst = dst;
-
- size /= sizeof(long);
- while (size--)
- *ldst++ = *lsrc++;
-}
-
/* verify correctness of eBPF program */
int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, bpfptr_t uattr);
@@ -1917,9 +2146,9 @@ static inline bool bpf_tracing_btf_ctx_access(int off, int size,
return btf_ctx_access(off, size, type, prog, info);
}
-int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf,
- const struct btf_type *t, int off, int size,
- enum bpf_access_type atype,
+int btf_struct_access(struct bpf_verifier_log *log,
+ const struct bpf_reg_state *reg,
+ int off, int size, enum bpf_access_type atype,
u32 *next_btf_id, enum bpf_type_flag *flag);
bool btf_struct_ids_match(struct bpf_verifier_log *log,
const struct btf *btf, u32 id, int off,
@@ -1935,10 +2164,8 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
struct bpf_reg_state;
int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog,
struct bpf_reg_state *regs);
-int btf_check_kfunc_arg_match(struct bpf_verifier_env *env,
- const struct btf *btf, u32 func_id,
- struct bpf_reg_state *regs,
- u32 kfunc_flags);
+int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog,
+ struct bpf_reg_state *regs);
int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
struct bpf_reg_state *reg);
int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog,
@@ -1949,6 +2176,7 @@ struct bpf_link *bpf_link_by_id(u32 id);
const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id);
void bpf_task_storage_free(struct task_struct *task);
+void bpf_cgrp_storage_free(struct cgroup *cgroup);
bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog);
const struct btf_func_model *
bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
@@ -1966,6 +2194,17 @@ static inline bool unprivileged_ebpf_enabled(void)
return !sysctl_unprivileged_bpf_disabled;
}
+/* Not all bpf prog type has the bpf_ctx.
+ * For the bpf prog type that has initialized the bpf_ctx,
+ * this function can be used to decide if a kernel function
+ * is called by a bpf program.
+ */
+static inline bool has_current_bpf_ctx(void)
+{
+ return !!current->bpf_ctx;
+}
+
+void notrace bpf_prog_inc_misses_counter(struct bpf_prog *prog);
#else /* !CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
{
@@ -2148,6 +2387,14 @@ static inline struct bpf_prog *bpf_prog_by_id(u32 id)
return ERR_PTR(-ENOTSUPP);
}
+static inline int btf_struct_access(struct bpf_verifier_log *log,
+ const struct bpf_reg_state *reg,
+ int off, int size, enum bpf_access_type atype,
+ u32 *next_btf_id, enum bpf_type_flag *flag)
+{
+ return -EACCES;
+}
+
static inline const struct bpf_func_proto *
bpf_base_func_proto(enum bpf_func_id func_id)
{
@@ -2175,6 +2422,18 @@ static inline bool unprivileged_ebpf_enabled(void)
return false;
}
+static inline bool has_current_bpf_ctx(void)
+{
+ return false;
+}
+
+static inline void bpf_prog_inc_misses_counter(struct bpf_prog *prog)
+{
+}
+
+static inline void bpf_cgrp_storage_free(struct cgroup *cgroup)
+{
+}
#endif /* CONFIG_BPF_SYSCALL */
void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
@@ -2349,6 +2608,7 @@ extern const struct bpf_func_proto bpf_get_numa_node_id_proto;
extern const struct bpf_func_proto bpf_tail_call_proto;
extern const struct bpf_func_proto bpf_ktime_get_ns_proto;
extern const struct bpf_func_proto bpf_ktime_get_boot_ns_proto;
+extern const struct bpf_func_proto bpf_ktime_get_tai_ns_proto;
extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto;
extern const struct bpf_func_proto bpf_get_current_uid_gid_proto;
extern const struct bpf_func_proto bpf_get_current_comm_proto;
@@ -2361,6 +2621,7 @@ extern const struct bpf_func_proto bpf_sock_map_update_proto;
extern const struct bpf_func_proto bpf_sock_hash_update_proto;
extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto;
extern const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto;
+extern const struct bpf_func_proto bpf_get_cgroup_classid_curr_proto;
extern const struct bpf_func_proto bpf_msg_redirect_hash_proto;
extern const struct bpf_func_proto bpf_msg_redirect_map_proto;
extern const struct bpf_func_proto bpf_sk_redirect_hash_proto;
@@ -2397,7 +2658,9 @@ extern const struct bpf_func_proto bpf_this_cpu_ptr_proto;
extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto;
extern const struct bpf_func_proto bpf_sock_from_file_proto;
extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto;
+extern const struct bpf_func_proto bpf_task_storage_get_recur_proto;
extern const struct bpf_func_proto bpf_task_storage_get_proto;
+extern const struct bpf_func_proto bpf_task_storage_delete_recur_proto;
extern const struct bpf_func_proto bpf_task_storage_delete_proto;
extern const struct bpf_func_proto bpf_for_each_map_elem_proto;
extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto;
@@ -2410,6 +2673,9 @@ extern const struct bpf_func_proto bpf_loop_proto;
extern const struct bpf_func_proto bpf_copy_from_user_task_proto;
extern const struct bpf_func_proto bpf_set_retval_proto;
extern const struct bpf_func_proto bpf_get_retval_proto;
+extern const struct bpf_func_proto bpf_user_ringbuf_drain_proto;
+extern const struct bpf_func_proto bpf_cgrp_storage_get_proto;
+extern const struct bpf_func_proto bpf_cgrp_storage_delete_proto;
const struct bpf_func_proto *tracing_prog_func_proto(
enum bpf_func_id func_id, const struct bpf_prog *prog);
@@ -2554,7 +2820,7 @@ enum bpf_dynptr_type {
BPF_DYNPTR_TYPE_INVALID,
/* Points to memory that is local to the bpf program */
BPF_DYNPTR_TYPE_LOCAL,
- /* Underlying data is a ringbuf record */
+ /* Underlying data is a kernel-produced ringbuf record */
BPF_DYNPTR_TYPE_RINGBUF,
};
@@ -2562,6 +2828,7 @@ void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
enum bpf_dynptr_type type, u32 offset, u32 size);
void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr);
int bpf_dynptr_check_size(u32 size);
+u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr);
#ifdef CONFIG_BPF_LSM
void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype);
@@ -2571,4 +2838,18 @@ static inline void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype) {}
static inline void bpf_cgroup_atype_put(int cgroup_atype) {}
#endif /* CONFIG_BPF_LSM */
+struct key;
+
+#ifdef CONFIG_KEYS
+struct bpf_key {
+ struct key *key;
+ bool has_ref;
+};
+#endif /* CONFIG_KEYS */
+
+static inline bool type_is_alloc(u32 type)
+{
+ return type & MEM_ALLOC;
+}
+
#endif /* _LINUX_BPF_H */
diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
index 7ea18d4da84b..6d37a40cd90e 100644
--- a/include/linux/bpf_local_storage.h
+++ b/include/linux/bpf_local_storage.h
@@ -116,21 +116,22 @@ static struct bpf_local_storage_cache name = { \
.idx_lock = __SPIN_LOCK_UNLOCKED(name.idx_lock), \
}
-u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache);
-void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache,
- u16 idx);
-
/* Helper functions for bpf_local_storage */
int bpf_local_storage_map_alloc_check(union bpf_attr *attr);
-struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr);
+struct bpf_map *
+bpf_local_storage_map_alloc(union bpf_attr *attr,
+ struct bpf_local_storage_cache *cache);
struct bpf_local_storage_data *
bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
struct bpf_local_storage_map *smap,
bool cacheit_lockit);
-void bpf_local_storage_map_free(struct bpf_local_storage_map *smap,
+bool bpf_local_storage_unlink_nolock(struct bpf_local_storage *local_storage);
+
+void bpf_local_storage_map_free(struct bpf_map *map,
+ struct bpf_local_storage_cache *cache,
int __percpu *busy_counter);
int bpf_local_storage_map_check_btf(const struct bpf_map *map,
@@ -141,10 +142,6 @@ int bpf_local_storage_map_check_btf(const struct bpf_map *map,
void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
struct bpf_local_storage_elem *selem);
-bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
- struct bpf_local_storage_elem *selem,
- bool uncharge_omem, bool use_trace_rcu);
-
void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu);
void bpf_selem_link_map(struct bpf_local_storage_map *smap,
diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h
index 4bcf76a9bb06..1de7ece5d36d 100644
--- a/include/linux/bpf_lsm.h
+++ b/include/linux/bpf_lsm.h
@@ -28,6 +28,7 @@ int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
const struct bpf_prog *prog);
bool bpf_lsm_is_sleepable_hook(u32 btf_id);
+bool bpf_lsm_is_trusted(const struct bpf_prog *prog);
static inline struct bpf_storage_blob *bpf_inode(
const struct inode *inode)
@@ -51,6 +52,11 @@ static inline bool bpf_lsm_is_sleepable_hook(u32 btf_id)
return false;
}
+static inline bool bpf_lsm_is_trusted(const struct bpf_prog *prog)
+{
+ return false;
+}
+
static inline int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
const struct bpf_prog *prog)
{
diff --git a/include/linux/bpf_mem_alloc.h b/include/linux/bpf_mem_alloc.h
new file mode 100644
index 000000000000..3e164b8efaa9
--- /dev/null
+++ b/include/linux/bpf_mem_alloc.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#ifndef _BPF_MEM_ALLOC_H
+#define _BPF_MEM_ALLOC_H
+#include <linux/compiler_types.h>
+#include <linux/workqueue.h>
+
+struct bpf_mem_cache;
+struct bpf_mem_caches;
+
+struct bpf_mem_alloc {
+ struct bpf_mem_caches __percpu *caches;
+ struct bpf_mem_cache __percpu *cache;
+ struct work_struct work;
+};
+
+int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu);
+void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma);
+
+/* kmalloc/kfree equivalent: */
+void *bpf_mem_alloc(struct bpf_mem_alloc *ma, size_t size);
+void bpf_mem_free(struct bpf_mem_alloc *ma, void *ptr);
+
+/* kmem_cache_alloc/free equivalent: */
+void *bpf_mem_cache_alloc(struct bpf_mem_alloc *ma);
+void bpf_mem_cache_free(struct bpf_mem_alloc *ma, void *ptr);
+
+#endif /* _BPF_MEM_ALLOC_H */
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 2b9112b80171..d4ee3ccd3753 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -86,6 +86,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_PROG_ARRAY, prog_array_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_PERF_EVENT_ARRAY, perf_event_array_map_ops)
#ifdef CONFIG_CGROUPS
BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, cgroup_array_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_CGRP_STORAGE, cgrp_storage_map_ops)
#endif
#ifdef CONFIG_CGROUP_BPF
BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, cgroup_storage_map_ops)
@@ -126,6 +127,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops)
#endif
BPF_MAP_TYPE(BPF_MAP_TYPE_RINGBUF, ringbuf_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_BLOOM_FILTER, bloom_filter_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_USER_RINGBUF, user_ringbuf_map_ops)
BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint)
BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing)
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 2e3bad8640dc..53d175cbaa02 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -19,7 +19,7 @@
*/
#define BPF_MAX_VAR_SIZ (1 << 29)
/* size of type_str_buf in bpf_verifier. */
-#define TYPE_STR_BUF_LEN 64
+#define TYPE_STR_BUF_LEN 128
/* Liveness marks, used for registers and spilled-regs (in stack slots).
* Read marks propagate upwards until they find a write mark; they record that
@@ -212,6 +212,22 @@ struct bpf_reference_state {
* is used purely to inform the user of a reference leak.
*/
int insn_idx;
+ /* There can be a case like:
+ * main (frame 0)
+ * cb (frame 1)
+ * func (frame 3)
+ * cb (frame 4)
+ * Hence for frame 4, if callback_ref just stored boolean, it would be
+ * impossible to distinguish nested callback refs. Hence store the
+ * frameno and compare that to callback_ref in check_reference_leak when
+ * exiting a callback function.
+ */
+ int callback_ref;
+ /* Mark the reference state to release the registers sharing the same id
+ * on bpf_spin_unlock (for nodes that we will lose ownership to but are
+ * safe to access inside the critical section).
+ */
+ bool release_on_unlock;
};
/* state of the program:
@@ -237,6 +253,7 @@ struct bpf_func_state {
*/
u32 async_entry_cnt;
bool in_callback_fn;
+ struct tnum callback_ret_range;
bool in_async_callback_fn;
/* The following fields should be last. See copy_func_state() */
@@ -256,9 +273,9 @@ struct bpf_id_pair {
u32 cur;
};
-/* Maximum number of register states that can exist at once */
-#define BPF_ID_MAP_SIZE (MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE)
#define MAX_CALL_FRAMES 8
+/* Maximum number of register states that can exist at once */
+#define BPF_ID_MAP_SIZE ((MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE) * MAX_CALL_FRAMES)
struct bpf_verifier_state {
/* call stack tracking */
struct bpf_func_state *frame[MAX_CALL_FRAMES];
@@ -311,8 +328,23 @@ struct bpf_verifier_state {
u32 branches;
u32 insn_idx;
u32 curframe;
- u32 active_spin_lock;
+ /* For every reg representing a map value or allocated object pointer,
+ * we consider the tuple of (ptr, id) for them to be unique in verifier
+ * context and conside them to not alias each other for the purposes of
+ * tracking lock state.
+ */
+ struct {
+ /* This can either be reg->map_ptr or reg->btf. If ptr is NULL,
+ * there's no active lock held, and other fields have no
+ * meaning. If non-NULL, it indicates that a lock is held and
+ * id member has the reg->id of the register which can be >= 0.
+ */
+ void *ptr;
+ /* This will be reg->id */
+ u32 id;
+ } active_lock;
bool speculative;
+ bool active_rcu_lock;
/* first and last insn idx of this verifier state */
u32 first_insn_idx;
@@ -337,6 +369,27 @@ struct bpf_verifier_state {
iter < frame->allocated_stack / BPF_REG_SIZE; \
iter++, reg = bpf_get_spilled_reg(iter, frame))
+/* Invoke __expr over regsiters in __vst, setting __state and __reg */
+#define bpf_for_each_reg_in_vstate(__vst, __state, __reg, __expr) \
+ ({ \
+ struct bpf_verifier_state *___vstate = __vst; \
+ int ___i, ___j; \
+ for (___i = 0; ___i <= ___vstate->curframe; ___i++) { \
+ struct bpf_reg_state *___regs; \
+ __state = ___vstate->frame[___i]; \
+ ___regs = __state->regs; \
+ for (___j = 0; ___j < MAX_BPF_REG; ___j++) { \
+ __reg = &___regs[___j]; \
+ (void)(__expr); \
+ } \
+ bpf_for_each_spilled_reg(___j, __state, __reg) { \
+ if (!__reg) \
+ continue; \
+ (void)(__expr); \
+ } \
+ } \
+ })
+
/* linked list of verifier states used to prune search */
struct bpf_verifier_state_list {
struct bpf_verifier_state state;
@@ -386,16 +439,20 @@ struct bpf_insn_aux_data {
*/
struct bpf_loop_inline_state loop_inline_state;
};
+ u64 obj_new_size; /* remember the size of type passed to bpf_obj_new to rewrite R1 */
+ struct btf_struct_meta *kptr_struct_meta;
u64 map_key_state; /* constant (32 bit) key tracking for maps */
int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
u32 seen; /* this insn was processed by the verifier at env->pass_cnt */
bool sanitize_stack_spill; /* subject to Spectre v4 sanitation */
bool zext_dst; /* this insn zero extends dst reg */
+ bool storage_get_func_atomic; /* bpf_*_storage_get() with atomic memory alloc */
u8 alu_state; /* used in combination with alu_limit */
/* below fields are initialized once */
unsigned int orig_idx; /* original instruction index */
bool prune_point;
+ bool jmp_point;
};
#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
@@ -475,11 +532,11 @@ struct bpf_verifier_env {
bool explore_alu_limits;
bool allow_ptr_leaks;
bool allow_uninit_stack;
- bool allow_ptr_to_map_access;
bool bpf_capable;
bool bypass_spec_v1;
bool bypass_spec_v4;
bool seen_direct_write;
+ bool rcu_tag_supported;
struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
const struct bpf_line_info *prev_linfo;
struct bpf_verifier_log log;
@@ -556,10 +613,11 @@ int check_ptr_off_reg(struct bpf_verifier_env *env,
int check_func_arg_reg_off(struct bpf_verifier_env *env,
const struct bpf_reg_state *reg, int regno,
enum bpf_arg_type arg_type);
-int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
- u32 regno);
int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
u32 regno, u32 mem_size);
+struct bpf_call_arg_meta;
+int process_dynptr_func(struct bpf_verifier_env *env, int regno,
+ enum bpf_arg_type arg_type, struct bpf_call_arg_meta *meta);
/* this lives here instead of in bpf.h because it needs to dereference tgt_prog */
static inline u64 bpf_trampoline_compute_key(const struct bpf_prog *tgt_prog,
@@ -587,6 +645,8 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
struct bpf_attach_target_info *tgt_info);
void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab);
+int mark_chain_precision(struct bpf_verifier_env *env, int regno);
+
#define BPF_BASE_TYPE_MASK GENMASK(BPF_BASE_TYPE_BITS - 1, 0)
/* extract base type from bpf_{arg, return, reg}_type. */
@@ -602,10 +662,30 @@ static inline u32 type_flag(u32 type)
}
/* only use after check_attach_btf_id() */
-static inline enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog)
+static inline enum bpf_prog_type resolve_prog_type(const struct bpf_prog *prog)
{
return prog->type == BPF_PROG_TYPE_EXT ?
prog->aux->dst_prog->type : prog->type;
}
+static inline bool bpf_prog_check_recur(const struct bpf_prog *prog)
+{
+ switch (resolve_prog_type(prog)) {
+ case BPF_PROG_TYPE_TRACING:
+ return prog->expected_attach_type != BPF_TRACE_ITER;
+ case BPF_PROG_TYPE_STRUCT_OPS:
+ case BPF_PROG_TYPE_LSM:
+ return false;
+ default:
+ return true;
+ }
+}
+
+#define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | PTR_TRUSTED)
+
+static inline bool bpf_type_has_unsafe_modifiers(u32 type)
+{
+ return type_flag(type) & ~BPF_REG_TRUSTED_MODIFIERS;
+}
+
#endif /* _LINUX_BPF_VERIFIER_H */
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 6ff567ece34a..9e77165f3ef6 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -293,6 +293,7 @@
#define MII_BRCM_FET_SHDW_MC_FAME 0x4000 /* Force Auto MDIX enable */
#define MII_BRCM_FET_SHDW_AUXMODE4 0x1a /* Auxiliary mode 4 */
+#define MII_BRCM_FET_SHDW_AM4_STANDBY 0x0008 /* Standby enable */
#define MII_BRCM_FET_SHDW_AM4_LED_MASK 0x0003
#define MII_BRCM_FET_SHDW_AM4_LED_MODE1 0x0001
diff --git a/include/linux/btf.h b/include/linux/btf.h
index cdb376d53238..5f628f323442 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -6,6 +6,8 @@
#include <linux/types.h>
#include <linux/bpfptr.h>
+#include <linux/bsearch.h>
+#include <linux/btf_ids.h>
#include <uapi/linux/btf.h>
#include <uapi/linux/bpf.h>
@@ -17,38 +19,67 @@
#define KF_RELEASE (1 << 1) /* kfunc is a release function */
#define KF_RET_NULL (1 << 2) /* kfunc returns a pointer that may be NULL */
#define KF_KPTR_GET (1 << 3) /* kfunc returns reference to a kptr */
-/* Trusted arguments are those which are meant to be referenced arguments with
- * unchanged offset. It is used to enforce that pointers obtained from acquire
- * kfuncs remain unmodified when being passed to helpers taking trusted args.
+/* Trusted arguments are those which are guaranteed to be valid when passed to
+ * the kfunc. It is used to enforce that pointers obtained from either acquire
+ * kfuncs, or from the main kernel on a tracepoint or struct_ops callback
+ * invocation, remain unmodified when being passed to helpers taking trusted
+ * args.
*
- * Consider
- * struct foo {
- * int data;
- * struct foo *next;
- * };
+ * Consider, for example, the following new task tracepoint:
*
- * struct bar {
- * int data;
- * struct foo f;
- * };
+ * SEC("tp_btf/task_newtask")
+ * int BPF_PROG(new_task_tp, struct task_struct *task, u64 clone_flags)
+ * {
+ * ...
+ * }
*
- * struct foo *f = alloc_foo(); // Acquire kfunc
- * struct bar *b = alloc_bar(); // Acquire kfunc
+ * And the following kfunc:
*
- * If a kfunc set_foo_data() wants to operate only on the allocated object, it
- * will set the KF_TRUSTED_ARGS flag, which will prevent unsafe usage like:
+ * BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
*
- * set_foo_data(f, 42); // Allowed
- * set_foo_data(f->next, 42); // Rejected, non-referenced pointer
- * set_foo_data(&f->next, 42);// Rejected, referenced, but wrong type
- * set_foo_data(&b->f, 42); // Rejected, referenced, but bad offset
+ * All invocations to the kfunc must pass the unmodified, unwalked task:
*
- * In the final case, usually for the purposes of type matching, it is deduced
- * by looking at the type of the member at the offset, but due to the
- * requirement of trusted argument, this deduction will be strict and not done
- * for this case.
+ * bpf_task_acquire(task); // Allowed
+ * bpf_task_acquire(task->last_wakee); // Rejected, walked task
+ *
+ * Programs may also pass referenced tasks directly to the kfunc:
+ *
+ * struct task_struct *acquired;
+ *
+ * acquired = bpf_task_acquire(task); // Allowed, same as above
+ * bpf_task_acquire(acquired); // Allowed
+ * bpf_task_acquire(task); // Allowed
+ * bpf_task_acquire(acquired->last_wakee); // Rejected, walked task
+ *
+ * Programs may _not_, however, pass a task from an arbitrary fentry/fexit, or
+ * kprobe/kretprobe to the kfunc, as BPF cannot guarantee that all of these
+ * pointers are guaranteed to be safe. For example, the following BPF program
+ * would be rejected:
+ *
+ * SEC("kretprobe/free_task")
+ * int BPF_PROG(free_task_probe, struct task_struct *tsk)
+ * {
+ * struct task_struct *acquired;
+ *
+ * acquired = bpf_task_acquire(acquired); // Rejected, not a trusted pointer
+ * bpf_task_release(acquired);
+ *
+ * return 0;
+ * }
*/
#define KF_TRUSTED_ARGS (1 << 4) /* kfunc only takes trusted pointer arguments */
+#define KF_SLEEPABLE (1 << 5) /* kfunc may sleep */
+#define KF_DESTRUCTIVE (1 << 6) /* kfunc performs destructive actions */
+#define KF_RCU (1 << 7) /* kfunc only takes rcu pointer arguments */
+
+/*
+ * Return the name of the passed struct, if exists, or halt the build if for
+ * example the structure gets renamed. In this way, developers have to revisit
+ * the code using that structure name, and update it accordingly.
+ */
+#define stringify_struct(x) \
+ ({ BUILD_BUG_ON(sizeof(struct x) < 0); \
+ __stringify(x); })
struct btf;
struct btf_member;
@@ -67,6 +98,17 @@ struct btf_id_dtor_kfunc {
u32 kfunc_btf_id;
};
+struct btf_struct_meta {
+ u32 btf_id;
+ struct btf_record *record;
+ struct btf_field_offs *field_offs;
+};
+
+struct btf_struct_metas {
+ u32 cnt;
+ struct btf_struct_meta types[];
+};
+
typedef void (*btf_dtor_kfunc_t)(void *);
extern const struct file_operations btf_fops;
@@ -152,8 +194,10 @@ bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
u32 expected_offset, u32 expected_size);
int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t);
int btf_find_timer(const struct btf *btf, const struct btf_type *t);
-struct bpf_map_value_off *btf_parse_kptrs(const struct btf *btf,
- const struct btf_type *t);
+struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t,
+ u32 field_mask, u32 value_size);
+int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec);
+struct btf_field_offs *btf_parse_field_offs(struct btf_record *rec);
bool btf_type_is_void(const struct btf_type *t);
s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind);
const struct btf_type *btf_type_skip_modifiers(const struct btf *btf,
@@ -277,6 +321,11 @@ static inline bool btf_type_is_typedef(const struct btf_type *t)
return BTF_INFO_KIND(t->info) == BTF_KIND_TYPEDEF;
}
+static inline bool btf_type_is_volatile(const struct btf_type *t)
+{
+ return BTF_INFO_KIND(t->info) == BTF_KIND_VOLATILE;
+}
+
static inline bool btf_type_is_func(const struct btf_type *t)
{
return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC;
@@ -307,6 +356,16 @@ static inline bool btf_type_is_struct(const struct btf_type *t)
return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION;
}
+static inline bool __btf_type_is_struct(const struct btf_type *t)
+{
+ return BTF_INFO_KIND(t->info) == BTF_KIND_STRUCT;
+}
+
+static inline bool btf_type_is_array(const struct btf_type *t)
+{
+ return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY;
+}
+
static inline u16 btf_type_vlen(const struct btf_type *t)
{
return BTF_INFO_VLEN(t->info);
@@ -391,9 +450,27 @@ static inline struct btf_param *btf_params(const struct btf_type *t)
return (struct btf_param *)(t + 1);
}
-#ifdef CONFIG_BPF_SYSCALL
+static inline int btf_id_cmp_func(const void *a, const void *b)
+{
+ const int *pa = a, *pb = b;
+
+ return *pa - *pb;
+}
+
+static inline bool btf_id_set_contains(const struct btf_id_set *set, u32 id)
+{
+ return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL;
+}
+
+static inline void *btf_id_set8_contains(const struct btf_id_set8 *set, u32 id)
+{
+ return bsearch(&id, set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func);
+}
+
struct bpf_prog;
+struct bpf_verifier_log;
+#ifdef CONFIG_BPF_SYSCALL
const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
const char *btf_name_by_offset(const struct btf *btf, u32 offset);
struct btf *btf_parse_vmlinux(void);
@@ -401,11 +478,21 @@ struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog);
u32 *btf_kfunc_id_set_contains(const struct btf *btf,
enum bpf_prog_type prog_type,
u32 kfunc_btf_id);
+u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id);
int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
const struct btf_kfunc_id_set *s);
+int register_btf_fmodret_id_set(const struct btf_kfunc_id_set *kset);
s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id);
int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_cnt,
struct module *owner);
+struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf, u32 btf_id);
+const struct btf_member *
+btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
+ const struct btf_type *t, enum bpf_prog_type prog_type,
+ int arg);
+int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_type);
+bool btf_types_are_same(const struct btf *btf1, u32 id1,
+ const struct btf *btf2, u32 id2);
#else
static inline const struct btf_type *btf_type_by_id(const struct btf *btf,
u32 type_id)
@@ -437,6 +524,36 @@ static inline int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dt
{
return 0;
}
+static inline struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf, u32 btf_id)
+{
+ return NULL;
+}
+static inline const struct btf_member *
+btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
+ const struct btf_type *t, enum bpf_prog_type prog_type,
+ int arg)
+{
+ return NULL;
+}
+static inline int get_kern_ctx_btf_id(struct bpf_verifier_log *log,
+ enum bpf_prog_type prog_type) {
+ return -EINVAL;
+}
+static inline bool btf_types_are_same(const struct btf *btf1, u32 id1,
+ const struct btf *btf2, u32 id2)
+{
+ return false;
+}
#endif
+static inline bool btf_type_is_struct_ptr(struct btf *btf, const struct btf_type *t)
+{
+ if (!btf_type_is_ptr(t))
+ return false;
+
+ t = btf_type_skip_modifiers(btf, t->type, NULL);
+
+ return btf_type_is_struct(t);
+}
+
#endif
diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h
index 2aea877d644f..3a4f7cd882ca 100644
--- a/include/linux/btf_ids.h
+++ b/include/linux/btf_ids.h
@@ -204,7 +204,7 @@ extern struct btf_id_set8 name;
#else
-#define BTF_ID_LIST(name) static u32 __maybe_unused name[5];
+#define BTF_ID_LIST(name) static u32 __maybe_unused name[16];
#define BTF_ID(prefix, name)
#define BTF_ID_FLAGS(prefix, name, ...)
#define BTF_ID_UNUSED
@@ -265,5 +265,7 @@ MAX_BTF_TRACING_TYPE,
};
extern u32 btf_tracing_ids[];
+extern u32 bpf_cgroup_btf_id[];
+extern u32 bpf_local_storage_map_btf_id[];
#endif
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index def8b8d30ccc..33fa5e94aa80 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -138,6 +138,17 @@ BUFFER_FNS(Defer_Completion, defer_completion)
static __always_inline void set_buffer_uptodate(struct buffer_head *bh)
{
/*
+ * If somebody else already set this uptodate, they will
+ * have done the memory barrier, and a reader will thus
+ * see *some* valid buffer state.
+ *
+ * Any other serialization (with IO errors or whatever that
+ * might clear the bit) has to come from other state (eg BH_Lock).
+ */
+ if (test_bit(BH_Uptodate, &bh->b_state))
+ return;
+
+ /*
* make it consistent with folio_mark_uptodate
* pairs with smp_load_acquire in buffer_uptodate
*/
@@ -156,7 +167,7 @@ static __always_inline int buffer_uptodate(const struct buffer_head *bh)
* make it consistent with folio_test_uptodate
* pairs with smp_mb__before_atomic in set_buffer_uptodate
*/
- return (smp_load_acquire(&bh->b_state) & (1UL << BH_Uptodate)) != 0;
+ return test_bit_acquire(BH_Uptodate, &bh->b_state);
}
#define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK)
@@ -214,8 +225,6 @@ struct buffer_head *__getblk_gfp(struct block_device *bdev, sector_t block,
void __brelse(struct buffer_head *);
void __bforget(struct buffer_head *);
void __breadahead(struct block_device *, sector_t block, unsigned int size);
-void __breadahead_gfp(struct block_device *, sector_t block, unsigned int size,
- gfp_t gfp);
struct buffer_head *__bread_gfp(struct block_device *,
sector_t block, unsigned size, gfp_t gfp);
void invalidate_bh_lrus(void);
@@ -225,15 +234,16 @@ struct buffer_head *alloc_buffer_head(gfp_t gfp_flags);
void free_buffer_head(struct buffer_head * bh);
void unlock_buffer(struct buffer_head *bh);
void __lock_buffer(struct buffer_head *bh);
-void ll_rw_block(blk_opf_t, int, struct buffer_head * bh[]);
int sync_dirty_buffer(struct buffer_head *bh);
int __sync_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags);
void write_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags);
-int submit_bh(blk_opf_t, struct buffer_head *);
+void submit_bh(blk_opf_t, struct buffer_head *);
void write_boundary_block(struct block_device *bdev,
sector_t bblock, unsigned blocksize);
int bh_uptodate_or_lock(struct buffer_head *bh);
-int bh_submit_read(struct buffer_head *bh);
+int __bh_read(struct buffer_head *bh, blk_opf_t op_flags, bool wait);
+void __bh_read_batch(int nr, struct buffer_head *bhs[],
+ blk_opf_t op_flags, bool force_lock);
extern int buffer_heads_over_limit;
@@ -340,12 +350,6 @@ sb_breadahead(struct super_block *sb, sector_t block)
__breadahead(sb->s_bdev, block, sb->s_blocksize);
}
-static inline void
-sb_breadahead_unmovable(struct super_block *sb, sector_t block)
-{
- __breadahead_gfp(sb->s_bdev, block, sb->s_blocksize, 0);
-}
-
static inline struct buffer_head *
sb_getblk(struct super_block *sb, sector_t block)
{
@@ -407,6 +411,41 @@ static inline struct buffer_head *__getblk(struct block_device *bdev,
return __getblk_gfp(bdev, block, size, __GFP_MOVABLE);
}
+static inline void bh_readahead(struct buffer_head *bh, blk_opf_t op_flags)
+{
+ if (!buffer_uptodate(bh) && trylock_buffer(bh)) {
+ if (!buffer_uptodate(bh))
+ __bh_read(bh, op_flags, false);
+ else
+ unlock_buffer(bh);
+ }
+}
+
+static inline void bh_read_nowait(struct buffer_head *bh, blk_opf_t op_flags)
+{
+ if (!bh_uptodate_or_lock(bh))
+ __bh_read(bh, op_flags, false);
+}
+
+/* Returns 1 if buffer uptodated, 0 on success, and -EIO on error. */
+static inline int bh_read(struct buffer_head *bh, blk_opf_t op_flags)
+{
+ if (bh_uptodate_or_lock(bh))
+ return 1;
+ return __bh_read(bh, op_flags, true);
+}
+
+static inline void bh_read_batch(int nr, struct buffer_head *bhs[])
+{
+ __bh_read_batch(nr, bhs, 0, true);
+}
+
+static inline void bh_readahead_batch(int nr, struct buffer_head *bhs[],
+ blk_opf_t op_flags)
+{
+ __bh_read_batch(nr, bhs, op_flags, false);
+}
+
/**
* __bread() - reads a specified block and returns the bh
* @bdev: the block_device to read from
diff --git a/include/linux/cache.h b/include/linux/cache.h
index d742c57eaee5..5da1bbd96154 100644
--- a/include/linux/cache.h
+++ b/include/linux/cache.h
@@ -85,4 +85,17 @@
#define cache_line_size() L1_CACHE_BYTES
#endif
+/*
+ * Helper to add padding within a struct to ensure data fall into separate
+ * cachelines.
+ */
+#if defined(CONFIG_SMP)
+struct cacheline_padding {
+ char x[0];
+} ____cacheline_internodealigned_in_smp;
+#define CACHELINE_PADDING(name) struct cacheline_padding name
+#else
+#define CACHELINE_PADDING(name)
+#endif
+
#endif /* __LINUX_CACHE_H */
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index c3e50e537e39..982ba245eb41 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -147,6 +147,27 @@ static inline u32 can_get_static_ctrlmode(struct can_priv *priv)
return priv->ctrlmode & ~priv->ctrlmode_supported;
}
+static inline bool can_is_canxl_dev_mtu(unsigned int mtu)
+{
+ return (mtu >= CANXL_MIN_MTU && mtu <= CANXL_MAX_MTU);
+}
+
+/* drop skb if it does not contain a valid CAN frame for sending */
+static inline bool can_dev_dropped_skb(struct net_device *dev, struct sk_buff *skb)
+{
+ struct can_priv *priv = netdev_priv(dev);
+
+ if (priv->ctrlmode & CAN_CTRLMODE_LISTENONLY) {
+ netdev_info_once(dev,
+ "interface in listen only mode, dropping skb\n");
+ kfree_skb(skb);
+ dev->stats.tx_dropped++;
+ return true;
+ }
+
+ return can_dropped_invalid_skb(dev, skb);
+}
+
void can_setup(struct net_device *dev);
struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max,
diff --git a/include/linux/can/platform/sja1000.h b/include/linux/can/platform/sja1000.h
index 5755ae5a4712..6a869682c120 100644
--- a/include/linux/can/platform/sja1000.h
+++ b/include/linux/can/platform/sja1000.h
@@ -14,7 +14,7 @@
#define OCR_MODE_TEST 0x01
#define OCR_MODE_NORMAL 0x02
#define OCR_MODE_CLOCK 0x03
-#define OCR_MODE_MASK 0x07
+#define OCR_MODE_MASK 0x03
#define OCR_TX0_INVERT 0x04
#define OCR_TX0_PULLDOWN 0x08
#define OCR_TX0_PULLUP 0x10
diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h
index 182749e858b3..1abc25a8d144 100644
--- a/include/linux/can/skb.h
+++ b/include/linux/can/skb.h
@@ -20,7 +20,8 @@ void can_flush_echo_skb(struct net_device *dev);
int can_put_echo_skb(struct sk_buff *skb, struct net_device *dev,
unsigned int idx, unsigned int frame_len);
struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx,
- u8 *len_ptr, unsigned int *frame_len_ptr);
+ unsigned int *len_ptr,
+ unsigned int *frame_len_ptr);
unsigned int __must_check can_get_echo_skb(struct net_device *dev,
unsigned int idx,
unsigned int *frame_len_ptr);
@@ -29,6 +30,9 @@ void can_free_echo_skb(struct net_device *dev, unsigned int idx,
struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf);
struct sk_buff *alloc_canfd_skb(struct net_device *dev,
struct canfd_frame **cfd);
+struct sk_buff *alloc_canxl_skb(struct net_device *dev,
+ struct canxl_frame **cxl,
+ unsigned int data_len);
struct sk_buff *alloc_can_err_skb(struct net_device *dev,
struct can_frame **cf);
bool can_dropped_invalid_skb(struct net_device *dev, struct sk_buff *skb);
@@ -97,10 +101,59 @@ static inline struct sk_buff *can_create_echo_skb(struct sk_buff *skb)
return nskb;
}
+static inline bool can_is_can_skb(const struct sk_buff *skb)
+{
+ struct can_frame *cf = (struct can_frame *)skb->data;
+
+ /* the CAN specific type of skb is identified by its data length */
+ return (skb->len == CAN_MTU && cf->len <= CAN_MAX_DLEN);
+}
+
static inline bool can_is_canfd_skb(const struct sk_buff *skb)
{
+ struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
+
/* the CAN specific type of skb is identified by its data length */
- return skb->len == CANFD_MTU;
+ return (skb->len == CANFD_MTU && cfd->len <= CANFD_MAX_DLEN);
+}
+
+static inline bool can_is_canxl_skb(const struct sk_buff *skb)
+{
+ const struct canxl_frame *cxl = (struct canxl_frame *)skb->data;
+
+ if (skb->len < CANXL_HDR_SIZE + CANXL_MIN_DLEN || skb->len > CANXL_MTU)
+ return false;
+
+ /* this also checks valid CAN XL data length boundaries */
+ if (skb->len != CANXL_HDR_SIZE + cxl->len)
+ return false;
+
+ return cxl->flags & CANXL_XLF;
+}
+
+/* get length element value from can[|fd|xl]_frame structure */
+static inline unsigned int can_skb_get_len_val(struct sk_buff *skb)
+{
+ const struct canxl_frame *cxl = (struct canxl_frame *)skb->data;
+ const struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
+
+ if (can_is_canxl_skb(skb))
+ return cxl->len;
+
+ return cfd->len;
+}
+
+/* get needed data length inside CAN frame for all frame types (RTR aware) */
+static inline unsigned int can_skb_get_data_len(struct sk_buff *skb)
+{
+ unsigned int len = can_skb_get_len_val(skb);
+ const struct can_frame *cf = (struct can_frame *)skb->data;
+
+ /* RTR frames have an actual length of zero */
+ if (can_is_can_skb(skb) && cf->can_id & CAN_RTR_FLAG)
+ return 0;
+
+ return len;
}
#endif /* !_CAN_SKB_H */
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index e7f2fb2fc207..99c1726be6ee 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -207,7 +207,6 @@ struct ceph_msg_data_cursor {
struct ceph_msg_data *data; /* current data item */
size_t resid; /* bytes not yet consumed */
- bool last_piece; /* current is last piece */
bool need_crc; /* crc update needed */
union {
#ifdef CONFIG_BLOCK
@@ -498,8 +497,7 @@ void ceph_con_discard_requeued(struct ceph_connection *con, u64 reconnect_seq);
void ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor,
struct ceph_msg *msg, size_t length);
struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor,
- size_t *page_offset, size_t *length,
- bool *last_piece);
+ size_t *page_offset, size_t *length);
void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, size_t bytes);
u32 ceph_crc32c_page(u32 crc, struct page *page, unsigned int page_offset,
diff --git a/include/linux/cfi.h b/include/linux/cfi.h
index c6dfc1ed0626..5e134f4ce8b7 100644
--- a/include/linux/cfi.h
+++ b/include/linux/cfi.h
@@ -2,49 +2,38 @@
/*
* Clang Control Flow Integrity (CFI) support.
*
- * Copyright (C) 2021 Google LLC
+ * Copyright (C) 2022 Google LLC
*/
#ifndef _LINUX_CFI_H
#define _LINUX_CFI_H
-#ifdef CONFIG_CFI_CLANG
-typedef void (*cfi_check_fn)(uint64_t id, void *ptr, void *diag);
-
-/* Compiler-generated function in each module, and the kernel */
-extern void __cfi_check(uint64_t id, void *ptr, void *diag);
-
-/*
- * Force the compiler to generate a CFI jump table entry for a function
- * and store the jump table address to __cfi_jt_<function>.
- */
-#define __CFI_ADDRESSABLE(fn, __attr) \
- const void *__cfi_jt_ ## fn __visible __attr = (void *)&fn
-
-#ifdef CONFIG_CFI_CLANG_SHADOW
-
-extern void cfi_module_add(struct module *mod, unsigned long base_addr);
-extern void cfi_module_remove(struct module *mod, unsigned long base_addr);
-
-#else
+#include <linux/bug.h>
+#include <linux/module.h>
-static inline void cfi_module_add(struct module *mod, unsigned long base_addr) {}
-static inline void cfi_module_remove(struct module *mod, unsigned long base_addr) {}
-
-#endif /* CONFIG_CFI_CLANG_SHADOW */
-
-#else /* !CONFIG_CFI_CLANG */
-
-#ifdef CONFIG_X86_KERNEL_IBT
-
-#define __CFI_ADDRESSABLE(fn, __attr) \
- const void *__cfi_jt_ ## fn __visible __attr = (void *)&fn
+#ifdef CONFIG_CFI_CLANG
+enum bug_trap_type report_cfi_failure(struct pt_regs *regs, unsigned long addr,
+ unsigned long *target, u32 type);
-#endif /* CONFIG_X86_KERNEL_IBT */
+static inline enum bug_trap_type report_cfi_failure_noaddr(struct pt_regs *regs,
+ unsigned long addr)
+{
+ return report_cfi_failure(regs, addr, NULL, 0);
+}
+#ifdef CONFIG_ARCH_USES_CFI_TRAPS
+bool is_cfi_trap(unsigned long addr);
+#endif
#endif /* CONFIG_CFI_CLANG */
-#ifndef __CFI_ADDRESSABLE
-#define __CFI_ADDRESSABLE(fn, __attr)
-#endif
+#ifdef CONFIG_MODULES
+#ifdef CONFIG_ARCH_USES_CFI_TRAPS
+void module_cfi_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
+ struct module *mod);
+#else
+static inline void module_cfi_finalize(const Elf_Ehdr *hdr,
+ const Elf_Shdr *sechdrs,
+ struct module *mod) {}
+#endif /* CONFIG_ARCH_USES_CFI_TRAPS */
+#endif /* CONFIG_MODULES */
#endif /* _LINUX_CFI_H */
diff --git a/include/linux/cfi_types.h b/include/linux/cfi_types.h
new file mode 100644
index 000000000000..6b8713675765
--- /dev/null
+++ b/include/linux/cfi_types.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Clang Control Flow Integrity (CFI) type definitions.
+ */
+#ifndef _LINUX_CFI_TYPES_H
+#define _LINUX_CFI_TYPES_H
+
+#ifdef __ASSEMBLY__
+#include <linux/linkage.h>
+
+#ifdef CONFIG_CFI_CLANG
+/*
+ * Use the __kcfi_typeid_<function> type identifier symbol to
+ * annotate indirectly called assembly functions. The compiler emits
+ * these symbols for all address-taken function declarations in C
+ * code.
+ */
+#ifndef __CFI_TYPE
+#define __CFI_TYPE(name) \
+ .4byte __kcfi_typeid_##name
+#endif
+
+#define SYM_TYPED_ENTRY(name, linkage, align...) \
+ linkage(name) ASM_NL \
+ align ASM_NL \
+ __CFI_TYPE(name) ASM_NL \
+ name:
+
+#define SYM_TYPED_START(name, linkage, align...) \
+ SYM_TYPED_ENTRY(name, linkage, align)
+
+#else /* CONFIG_CFI_CLANG */
+
+#define SYM_TYPED_START(name, linkage, align...) \
+ SYM_START(name, linkage, align)
+
+#endif /* CONFIG_CFI_CLANG */
+
+#ifndef SYM_TYPED_FUNC_START
+#define SYM_TYPED_FUNC_START(name) \
+ SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
+#endif
+
+#endif /* __ASSEMBLY__ */
+#endif /* _LINUX_CFI_TYPES_H */
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 4bcf56b3491c..8a0d5466c7be 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -126,11 +126,11 @@ enum {
CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */
CFTYPE_WORLD_WRITABLE = (1 << 4), /* (DON'T USE FOR NEW FILES) S_IWUGO */
CFTYPE_DEBUG = (1 << 5), /* create when cgroup_debug */
- CFTYPE_PRESSURE = (1 << 6), /* only if pressure feature is enabled */
/* internal flags, do not use outside cgroup core proper */
__CFTYPE_ONLY_ON_DFL = (1 << 16), /* only on default hierarchy */
__CFTYPE_NOT_ON_DFL = (1 << 17), /* not on default hierarchy */
+ __CFTYPE_ADDED = (1 << 18),
};
/*
@@ -384,7 +384,7 @@ struct cgroup {
/*
* The depth this cgroup is at. The root is at depth zero and each
* step down the hierarchy increments the level. This along with
- * ancestor_ids[] can determine whether a given cgroup is a
+ * ancestors[] can determine whether a given cgroup is a
* descendant of another without traversing the hierarchy.
*/
int level;
@@ -428,6 +428,9 @@ struct cgroup {
struct cgroup_file procs_file; /* handle for "cgroup.procs" */
struct cgroup_file events_file; /* handle for "cgroup.events" */
+ /* handles for "{cpu,memory,io,irq}.pressure" */
+ struct cgroup_file psi_files[NR_PSI_RESOURCES];
+
/*
* The bitmask of subsystems enabled on the child cgroups.
* ->subtree_control is the one configured through
@@ -504,8 +507,12 @@ struct cgroup {
/* Used to store internal freezer state */
struct cgroup_freezer_state freezer;
- /* ids of the ancestors at each level including self */
- u64 ancestor_ids[];
+#ifdef CONFIG_BPF_SYSCALL
+ struct bpf_local_storage __rcu *bpf_cgrp_storage;
+#endif
+
+ /* All ancestors including self */
+ struct cgroup *ancestors[];
};
/*
@@ -522,11 +529,15 @@ struct cgroup_root {
/* Unique id for this hierarchy. */
int hierarchy_id;
- /* The root cgroup. Root is destroyed on its release. */
+ /*
+ * The root cgroup. The containing cgroup_root will be destroyed on its
+ * release. cgrp->ancestors[0] will be used overflowing into the
+ * following field. cgrp_ancestor_storage must immediately follow.
+ */
struct cgroup cgrp;
- /* for cgrp->ancestor_ids[0] */
- u64 cgrp_ancestor_id_storage;
+ /* must follow cgrp for cgrp->ancestors[0], see above */
+ struct cgroup *cgrp_ancestor_storage;
/* Number of cgroups in the hierarchy, used only for /proc/cgroups */
atomic_t nr_cgrps;
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index ed53bfe7c46c..3410aecffdb4 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -68,6 +68,7 @@ struct css_task_iter {
struct list_head iters_node; /* css_set->task_iters */
};
+extern struct file_system_type cgroup_fs_type;
extern struct cgroup_root cgrp_dfl_root;
extern struct css_set init_css_set;
@@ -106,6 +107,7 @@ struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
struct cgroup *cgroup_get_from_path(const char *path);
struct cgroup *cgroup_get_from_fd(int fd);
+struct cgroup *cgroup_v1v2_get_from_fd(int fd);
int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
@@ -114,6 +116,7 @@ int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
int cgroup_rm_cftypes(struct cftype *cfts);
void cgroup_file_notify(struct cgroup_file *cfile);
+void cgroup_file_show(struct cgroup_file *cfile, bool show);
int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen);
int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry);
@@ -307,72 +310,25 @@ void css_task_iter_end(struct css_task_iter *it);
* Inline functions.
*/
+#ifdef CONFIG_DEBUG_CGROUP_REF
+void css_get(struct cgroup_subsys_state *css);
+void css_get_many(struct cgroup_subsys_state *css, unsigned int n);
+bool css_tryget(struct cgroup_subsys_state *css);
+bool css_tryget_online(struct cgroup_subsys_state *css);
+void css_put(struct cgroup_subsys_state *css);
+void css_put_many(struct cgroup_subsys_state *css, unsigned int n);
+#else
+#define CGROUP_REF_FN_ATTRS static inline
+#define CGROUP_REF_EXPORT(fn)
+#include <linux/cgroup_refcnt.h>
+#endif
+
static inline u64 cgroup_id(const struct cgroup *cgrp)
{
return cgrp->kn->id;
}
/**
- * css_get - obtain a reference on the specified css
- * @css: target css
- *
- * The caller must already have a reference.
- */
-static inline void css_get(struct cgroup_subsys_state *css)
-{
- if (!(css->flags & CSS_NO_REF))
- percpu_ref_get(&css->refcnt);
-}
-
-/**
- * css_get_many - obtain references on the specified css
- * @css: target css
- * @n: number of references to get
- *
- * The caller must already have a reference.
- */
-static inline void css_get_many(struct cgroup_subsys_state *css, unsigned int n)
-{
- if (!(css->flags & CSS_NO_REF))
- percpu_ref_get_many(&css->refcnt, n);
-}
-
-/**
- * css_tryget - try to obtain a reference on the specified css
- * @css: target css
- *
- * Obtain a reference on @css unless it already has reached zero and is
- * being released. This function doesn't care whether @css is on or
- * offline. The caller naturally needs to ensure that @css is accessible
- * but doesn't have to be holding a reference on it - IOW, RCU protected
- * access is good enough for this function. Returns %true if a reference
- * count was successfully obtained; %false otherwise.
- */
-static inline bool css_tryget(struct cgroup_subsys_state *css)
-{
- if (!(css->flags & CSS_NO_REF))
- return percpu_ref_tryget(&css->refcnt);
- return true;
-}
-
-/**
- * css_tryget_online - try to obtain a reference on the specified css if online
- * @css: target css
- *
- * Obtain a reference on @css if it's online. The caller naturally needs
- * to ensure that @css is accessible but doesn't have to be holding a
- * reference on it - IOW, RCU protected access is good enough for this
- * function. Returns %true if a reference count was successfully obtained;
- * %false otherwise.
- */
-static inline bool css_tryget_online(struct cgroup_subsys_state *css)
-{
- if (!(css->flags & CSS_NO_REF))
- return percpu_ref_tryget_live(&css->refcnt);
- return true;
-}
-
-/**
* css_is_dying - test whether the specified css is dying
* @css: target css
*
@@ -392,31 +348,6 @@ static inline bool css_is_dying(struct cgroup_subsys_state *css)
return !(css->flags & CSS_NO_REF) && percpu_ref_is_dying(&css->refcnt);
}
-/**
- * css_put - put a css reference
- * @css: target css
- *
- * Put a reference obtained via css_get() and css_tryget_online().
- */
-static inline void css_put(struct cgroup_subsys_state *css)
-{
- if (!(css->flags & CSS_NO_REF))
- percpu_ref_put(&css->refcnt);
-}
-
-/**
- * css_put_many - put css references
- * @css: target css
- * @n: number of references to put
- *
- * Put references obtained via css_get() and css_tryget_online().
- */
-static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n)
-{
- if (!(css->flags & CSS_NO_REF))
- percpu_ref_put_many(&css->refcnt, n);
-}
-
static inline void cgroup_get(struct cgroup *cgrp)
{
css_get(&cgrp->self);
@@ -432,6 +363,18 @@ static inline void cgroup_put(struct cgroup *cgrp)
css_put(&cgrp->self);
}
+extern struct mutex cgroup_mutex;
+
+static inline void cgroup_lock(void)
+{
+ mutex_lock(&cgroup_mutex);
+}
+
+static inline void cgroup_unlock(void)
+{
+ mutex_unlock(&cgroup_mutex);
+}
+
/**
* task_css_set_check - obtain a task's css_set with extra access conditions
* @task: the task to obtain css_set for
@@ -446,7 +389,6 @@ static inline void cgroup_put(struct cgroup *cgrp)
* as locks used during the cgroup_subsys::attach() methods.
*/
#ifdef CONFIG_PROVE_RCU
-extern struct mutex cgroup_mutex;
extern spinlock_t css_set_lock;
#define task_css_set_check(task, __c) \
rcu_dereference_check((task)->cgroups, \
@@ -574,7 +516,7 @@ static inline bool cgroup_is_descendant(struct cgroup *cgrp,
{
if (cgrp->root != ancestor->root || cgrp->level < ancestor->level)
return false;
- return cgrp->ancestor_ids[ancestor->level] == cgroup_id(ancestor);
+ return cgrp->ancestors[ancestor->level] == ancestor;
}
/**
@@ -591,11 +533,9 @@ static inline bool cgroup_is_descendant(struct cgroup *cgrp,
static inline struct cgroup *cgroup_ancestor(struct cgroup *cgrp,
int ancestor_level)
{
- if (cgrp->level < ancestor_level)
+ if (ancestor_level < 0 || ancestor_level > cgrp->level)
return NULL;
- while (cgrp && cgrp->level > ancestor_level)
- cgrp = cgroup_parent(cgrp);
- return cgrp;
+ return cgrp->ancestors[ancestor_level];
}
/**
@@ -672,11 +612,6 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp)
pr_cont_kernfs_path(cgrp->kn);
}
-static inline struct psi_group *cgroup_psi(struct cgroup *cgrp)
-{
- return cgrp->psi;
-}
-
bool cgroup_psi_enabled(void);
static inline void cgroup_init_kthreadd(void)
@@ -708,6 +643,8 @@ struct cgroup;
static inline u64 cgroup_id(const struct cgroup *cgrp) { return 1; }
static inline void css_get(struct cgroup_subsys_state *css) {}
static inline void css_put(struct cgroup_subsys_state *css) {}
+static inline void cgroup_lock(void) {}
+static inline void cgroup_unlock(void) {}
static inline int cgroup_attach_task_all(struct task_struct *from,
struct task_struct *t) { return 0; }
static inline int cgroupstats_build(struct cgroupstats *stats,
@@ -734,11 +671,6 @@ static inline struct cgroup *cgroup_parent(struct cgroup *cgrp)
return NULL;
}
-static inline struct psi_group *cgroup_psi(struct cgroup *cgrp)
-{
- return NULL;
-}
-
static inline bool cgroup_psi_enabled(void)
{
return false;
@@ -752,11 +684,6 @@ static inline bool task_under_cgroup_hierarchy(struct task_struct *task,
static inline void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen)
{}
-
-static inline struct cgroup *cgroup_get_from_id(u64 id)
-{
- return NULL;
-}
#endif /* !CONFIG_CGROUPS */
#ifdef CONFIG_CGROUPS
diff --git a/include/linux/cgroup_refcnt.h b/include/linux/cgroup_refcnt.h
new file mode 100644
index 000000000000..2eea0a69ecfc
--- /dev/null
+++ b/include/linux/cgroup_refcnt.h
@@ -0,0 +1,96 @@
+/**
+ * css_get - obtain a reference on the specified css
+ * @css: target css
+ *
+ * The caller must already have a reference.
+ */
+CGROUP_REF_FN_ATTRS
+void css_get(struct cgroup_subsys_state *css)
+{
+ if (!(css->flags & CSS_NO_REF))
+ percpu_ref_get(&css->refcnt);
+}
+CGROUP_REF_EXPORT(css_get)
+
+/**
+ * css_get_many - obtain references on the specified css
+ * @css: target css
+ * @n: number of references to get
+ *
+ * The caller must already have a reference.
+ */
+CGROUP_REF_FN_ATTRS
+void css_get_many(struct cgroup_subsys_state *css, unsigned int n)
+{
+ if (!(css->flags & CSS_NO_REF))
+ percpu_ref_get_many(&css->refcnt, n);
+}
+CGROUP_REF_EXPORT(css_get_many)
+
+/**
+ * css_tryget - try to obtain a reference on the specified css
+ * @css: target css
+ *
+ * Obtain a reference on @css unless it already has reached zero and is
+ * being released. This function doesn't care whether @css is on or
+ * offline. The caller naturally needs to ensure that @css is accessible
+ * but doesn't have to be holding a reference on it - IOW, RCU protected
+ * access is good enough for this function. Returns %true if a reference
+ * count was successfully obtained; %false otherwise.
+ */
+CGROUP_REF_FN_ATTRS
+bool css_tryget(struct cgroup_subsys_state *css)
+{
+ if (!(css->flags & CSS_NO_REF))
+ return percpu_ref_tryget(&css->refcnt);
+ return true;
+}
+CGROUP_REF_EXPORT(css_tryget)
+
+/**
+ * css_tryget_online - try to obtain a reference on the specified css if online
+ * @css: target css
+ *
+ * Obtain a reference on @css if it's online. The caller naturally needs
+ * to ensure that @css is accessible but doesn't have to be holding a
+ * reference on it - IOW, RCU protected access is good enough for this
+ * function. Returns %true if a reference count was successfully obtained;
+ * %false otherwise.
+ */
+CGROUP_REF_FN_ATTRS
+bool css_tryget_online(struct cgroup_subsys_state *css)
+{
+ if (!(css->flags & CSS_NO_REF))
+ return percpu_ref_tryget_live(&css->refcnt);
+ return true;
+}
+CGROUP_REF_EXPORT(css_tryget_online)
+
+/**
+ * css_put - put a css reference
+ * @css: target css
+ *
+ * Put a reference obtained via css_get() and css_tryget_online().
+ */
+CGROUP_REF_FN_ATTRS
+void css_put(struct cgroup_subsys_state *css)
+{
+ if (!(css->flags & CSS_NO_REF))
+ percpu_ref_put(&css->refcnt);
+}
+CGROUP_REF_EXPORT(css_put)
+
+/**
+ * css_put_many - put css references
+ * @css: target css
+ * @n: number of references to put
+ *
+ * Put references obtained via css_get() and css_tryget_online().
+ */
+CGROUP_REF_FN_ATTRS
+void css_put_many(struct cgroup_subsys_state *css, unsigned int n)
+{
+ if (!(css->flags & CSS_NO_REF))
+ percpu_ref_put_many(&css->refcnt, n);
+}
+CGROUP_REF_EXPORT(css_put_many)
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 1615010aa0ec..842e72a5348f 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -42,6 +42,9 @@ struct dentry;
* struct clk_rate_request - Structure encoding the clk constraints that
* a clock user might require.
*
+ * Should be initialized by calling clk_hw_init_rate_request().
+ *
+ * @core: Pointer to the struct clk_core affected by this request
* @rate: Requested clock rate. This field will be adjusted by
* clock drivers according to hardware capabilities.
* @min_rate: Minimum rate imposed by clk users.
@@ -53,6 +56,7 @@ struct dentry;
*
*/
struct clk_rate_request {
+ struct clk_core *core;
unsigned long rate;
unsigned long min_rate;
unsigned long max_rate;
@@ -60,6 +64,15 @@ struct clk_rate_request {
struct clk_hw *best_parent_hw;
};
+void clk_hw_init_rate_request(const struct clk_hw *hw,
+ struct clk_rate_request *req,
+ unsigned long rate);
+void clk_hw_forward_rate_request(const struct clk_hw *core,
+ const struct clk_rate_request *old_req,
+ const struct clk_hw *parent,
+ struct clk_rate_request *req,
+ unsigned long parent_rate);
+
/**
* struct clk_duty - Struture encoding the duty cycle ratio of a clock
*
@@ -118,8 +131,9 @@ struct clk_duty {
*
* @recalc_rate Recalculate the rate of this clock, by querying hardware. The
* parent rate is an input parameter. It is up to the caller to
- * ensure that the prepare_mutex is held across this call.
- * Returns the calculated rate. Optional, but recommended - if
+ * ensure that the prepare_mutex is held across this call. If the
+ * driver cannot figure out a rate for this clock, it must return
+ * 0. Returns the calculated rate. Optional, but recommended - if
* this op is not set then clock rate will be initialized to 0.
*
* @round_rate: Given a target rate as input, returns the closest rate actually
@@ -350,7 +364,7 @@ struct clk_hw *__clk_hw_register_fixed_rate(struct device *dev,
const char *parent_name, const struct clk_hw *parent_hw,
const struct clk_parent_data *parent_data, unsigned long flags,
unsigned long fixed_rate, unsigned long fixed_accuracy,
- unsigned long clk_fixed_flags);
+ unsigned long clk_fixed_flags, bool devm);
struct clk *clk_register_fixed_rate(struct device *dev, const char *name,
const char *parent_name, unsigned long flags,
unsigned long fixed_rate);
@@ -365,7 +379,20 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name,
*/
#define clk_hw_register_fixed_rate(dev, name, parent_name, flags, fixed_rate) \
__clk_hw_register_fixed_rate((dev), NULL, (name), (parent_name), NULL, \
- NULL, (flags), (fixed_rate), 0, 0)
+ NULL, (flags), (fixed_rate), 0, 0, false)
+
+/**
+ * devm_clk_hw_register_fixed_rate - register fixed-rate clock with the clock
+ * framework
+ * @dev: device that is registering this clock
+ * @name: name of this clock
+ * @parent_name: name of clock's parent
+ * @flags: framework-specific flags
+ * @fixed_rate: non-adjustable clock rate
+ */
+#define devm_clk_hw_register_fixed_rate(dev, name, parent_name, flags, fixed_rate) \
+ __clk_hw_register_fixed_rate((dev), NULL, (name), (parent_name), NULL, \
+ NULL, (flags), (fixed_rate), 0, 0, true)
/**
* clk_hw_register_fixed_rate_parent_hw - register fixed-rate clock with
* the clock framework
@@ -378,7 +405,7 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name,
#define clk_hw_register_fixed_rate_parent_hw(dev, name, parent_hw, flags, \
fixed_rate) \
__clk_hw_register_fixed_rate((dev), NULL, (name), NULL, (parent_hw), \
- NULL, (flags), (fixed_rate), 0, 0)
+ NULL, (flags), (fixed_rate), 0, 0, false)
/**
* clk_hw_register_fixed_rate_parent_data - register fixed-rate clock with
* the clock framework
@@ -392,7 +419,7 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name,
fixed_rate) \
__clk_hw_register_fixed_rate((dev), NULL, (name), NULL, NULL, \
(parent_data), (flags), (fixed_rate), 0, \
- 0)
+ 0, false)
/**
* clk_hw_register_fixed_rate_with_accuracy - register fixed-rate clock with
* the clock framework
@@ -408,7 +435,7 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name,
fixed_accuracy) \
__clk_hw_register_fixed_rate((dev), NULL, (name), (parent_name), \
NULL, NULL, (flags), (fixed_rate), \
- (fixed_accuracy), 0)
+ (fixed_accuracy), 0, false)
/**
* clk_hw_register_fixed_rate_with_accuracy_parent_hw - register fixed-rate
* clock with the clock framework
@@ -423,7 +450,7 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name,
parent_hw, flags, fixed_rate, fixed_accuracy) \
__clk_hw_register_fixed_rate((dev), NULL, (name), NULL, (parent_hw) \
NULL, NULL, (flags), (fixed_rate), \
- (fixed_accuracy), 0)
+ (fixed_accuracy), 0, false)
/**
* clk_hw_register_fixed_rate_with_accuracy_parent_data - register fixed-rate
* clock with the clock framework
@@ -438,7 +465,21 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name,
parent_data, flags, fixed_rate, fixed_accuracy) \
__clk_hw_register_fixed_rate((dev), NULL, (name), NULL, NULL, \
(parent_data), NULL, (flags), \
- (fixed_rate), (fixed_accuracy), 0)
+ (fixed_rate), (fixed_accuracy), 0, false)
+/**
+ * clk_hw_register_fixed_rate_parent_accuracy - register fixed-rate clock with
+ * the clock framework
+ * @dev: device that is registering this clock
+ * @name: name of this clock
+ * @parent_name: name of clock's parent
+ * @flags: framework-specific flags
+ * @fixed_rate: non-adjustable clock rate
+ */
+#define clk_hw_register_fixed_rate_parent_accuracy(dev, name, parent_data, \
+ flags, fixed_rate) \
+ __clk_hw_register_fixed_rate((dev), NULL, (name), NULL, NULL, \
+ (parent_data), (flags), (fixed_rate), 0, \
+ CLK_FIXED_RATE_PARENT_ACCURACY, false)
void clk_unregister_fixed_rate(struct clk *clk);
void clk_hw_unregister_fixed_rate(struct clk_hw *hw);
@@ -957,6 +998,13 @@ struct clk *clk_register_mux_table(struct device *dev, const char *name,
(parent_names), NULL, NULL, (flags), (reg), \
(shift), (mask), (clk_mux_flags), (table), \
(lock))
+#define clk_hw_register_mux_table_parent_data(dev, name, parent_data, \
+ num_parents, flags, reg, shift, mask, \
+ clk_mux_flags, table, lock) \
+ __clk_hw_register_mux((dev), NULL, (name), (num_parents), \
+ NULL, NULL, (parent_data), (flags), (reg), \
+ (shift), (mask), (clk_mux_flags), (table), \
+ (lock))
#define clk_hw_register_mux(dev, name, parent_names, num_parents, flags, reg, \
shift, width, clk_mux_flags, lock) \
__clk_hw_register_mux((dev), NULL, (name), (num_parents), \
@@ -974,6 +1022,13 @@ struct clk *clk_register_mux_table(struct device *dev, const char *name,
__clk_hw_register_mux((dev), NULL, (name), (num_parents), NULL, NULL, \
(parent_data), (flags), (reg), (shift), \
BIT((width)) - 1, (clk_mux_flags), NULL, (lock))
+#define clk_hw_register_mux_parent_data_table(dev, name, parent_data, \
+ num_parents, flags, reg, shift, \
+ width, clk_mux_flags, table, \
+ lock) \
+ __clk_hw_register_mux((dev), NULL, (name), (num_parents), NULL, NULL, \
+ (parent_data), (flags), (reg), (shift), \
+ BIT((width)) - 1, (clk_mux_flags), table, (lock))
#define devm_clk_hw_register_mux(dev, name, parent_names, num_parents, flags, reg, \
shift, width, clk_mux_flags, lock) \
__devm_clk_hw_register_mux((dev), NULL, (name), (num_parents), \
@@ -987,6 +1042,13 @@ struct clk *clk_register_mux_table(struct device *dev, const char *name,
(parent_hws), NULL, (flags), (reg), \
(shift), BIT((width)) - 1, \
(clk_mux_flags), NULL, (lock))
+#define devm_clk_hw_register_mux_parent_data_table(dev, name, parent_data, \
+ num_parents, flags, reg, shift, \
+ width, clk_mux_flags, table, \
+ lock) \
+ __devm_clk_hw_register_mux((dev), NULL, (name), (num_parents), NULL, \
+ NULL, (parent_data), (flags), (reg), (shift), \
+ BIT((width)) - 1, (clk_mux_flags), table, (lock))
int clk_mux_val_to_index(struct clk_hw *hw, const u32 *table, unsigned int flags,
unsigned int val);
@@ -1255,6 +1317,8 @@ int clk_mux_determine_rate_flags(struct clk_hw *hw,
struct clk_rate_request *req,
unsigned long flags);
void clk_hw_reparent(struct clk_hw *hw, struct clk_hw *new_parent);
+void clk_hw_get_rate_range(struct clk_hw *hw, unsigned long *min_rate,
+ unsigned long *max_rate);
void clk_hw_set_rate_range(struct clk_hw *hw, unsigned long min_rate,
unsigned long max_rate);
@@ -1454,7 +1518,7 @@ int devm_of_clk_add_hw_provider(struct device *dev,
void *data),
void *data);
void of_clk_del_provider(struct device_node *np);
-void devm_of_clk_del_provider(struct device *dev);
+
struct clk *of_clk_src_simple_get(struct of_phandle_args *clkspec,
void *data);
struct clk_hw *of_clk_hw_simple_get(struct of_phandle_args *clkspec,
@@ -1491,7 +1555,7 @@ static inline int devm_of_clk_add_hw_provider(struct device *dev,
return 0;
}
static inline void of_clk_del_provider(struct device_node *np) {}
-static inline void devm_of_clk_del_provider(struct device *dev) {}
+
static inline struct clk *of_clk_src_simple_get(
struct of_phandle_args *clkspec, void *data)
{
diff --git a/include/linux/clk.h b/include/linux/clk.h
index c13061cabdfc..1ef013324237 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -799,7 +799,7 @@ int clk_set_rate_exclusive(struct clk *clk, unsigned long rate);
*
* Returns true if @parent is a possible parent for @clk, false otherwise.
*/
-bool clk_has_parent(struct clk *clk, struct clk *parent);
+bool clk_has_parent(const struct clk *clk, const struct clk *parent);
/**
* clk_set_rate_range - set a rate range for a clock source
diff --git a/include/linux/clk/at91_pmc.h b/include/linux/clk/at91_pmc.h
index 3484309b59bf..7af499bdbecb 100644
--- a/include/linux/clk/at91_pmc.h
+++ b/include/linux/clk/at91_pmc.h
@@ -12,6 +12,8 @@
#ifndef AT91_PMC_H
#define AT91_PMC_H
+#include <linux/bits.h>
+
#define AT91_PMC_V1 (1) /* PMC version 1 */
#define AT91_PMC_V2 (2) /* PMC version 2 [SAM9X60] */
@@ -45,8 +47,8 @@
#define AT91_PMC_PCSR 0x18 /* Peripheral Clock Status Register */
#define AT91_PMC_PLL_ACR 0x18 /* PLL Analog Control Register [for SAM9X60] */
-#define AT91_PMC_PLL_ACR_DEFAULT_UPLL 0x12020010UL /* Default PLL ACR value for UPLL */
-#define AT91_PMC_PLL_ACR_DEFAULT_PLLA 0x00020010UL /* Default PLL ACR value for PLLA */
+#define AT91_PMC_PLL_ACR_DEFAULT_UPLL UL(0x12020010) /* Default PLL ACR value for UPLL */
+#define AT91_PMC_PLL_ACR_DEFAULT_PLLA UL(0x00020010) /* Default PLL ACR value for PLLA */
#define AT91_PMC_PLL_ACR_UTMIVR (1 << 12) /* UPLL Voltage regulator Control */
#define AT91_PMC_PLL_ACR_UTMIBG (1 << 13) /* UPLL Bandgap Control */
diff --git a/include/linux/clk/davinci.h b/include/linux/clk/davinci.h
index 8a7b5cd7eac0..f6ebab6228c2 100644
--- a/include/linux/clk/davinci.h
+++ b/include/linux/clk/davinci.h
@@ -28,13 +28,5 @@ int dm365_pll1_init(struct device *dev, void __iomem *base, struct regmap *cfgch
int dm365_pll2_init(struct device *dev, void __iomem *base, struct regmap *cfgchip);
int dm365_psc_init(struct device *dev, void __iomem *base);
#endif
-#ifdef CONFIG_ARCH_DAVINCI_DM644x
-int dm644x_pll1_init(struct device *dev, void __iomem *base, struct regmap *cfgchip);
-int dm644x_psc_init(struct device *dev, void __iomem *base);
-#endif
-#ifdef CONFIG_ARCH_DAVINCI_DM646x
-int dm646x_pll1_init(struct device *dev, void __iomem *base, struct regmap *cfgchip);
-int dm646x_psc_init(struct device *dev, void __iomem *base);
-#endif
#endif /* __LINUX_CLK_DAVINCI_PLL_H___ */
diff --git a/include/linux/clk/spear.h b/include/linux/clk/spear.h
index a64d034ceddd..eaf95ca656f8 100644
--- a/include/linux/clk/spear.h
+++ b/include/linux/clk/spear.h
@@ -8,6 +8,20 @@
#ifndef __LINUX_CLK_SPEAR_H
#define __LINUX_CLK_SPEAR_H
+#ifdef CONFIG_ARCH_SPEAR3XX
+void __init spear3xx_clk_init(void __iomem *misc_base,
+ void __iomem *soc_config_base);
+#else
+static inline void __init spear3xx_clk_init(void __iomem *misc_base,
+ void __iomem *soc_config_base) {}
+#endif
+
+#ifdef CONFIG_ARCH_SPEAR6XX
+void __init spear6xx_clk_init(void __iomem *misc_base);
+#else
+static inline void __init spear6xx_clk_init(void __iomem *misc_base) {}
+#endif
+
#ifdef CONFIG_MACH_SPEAR1310
void __init spear1310_clk_init(void __iomem *misc_base, void __iomem *ras_base);
#else
diff --git a/include/linux/clkdev.h b/include/linux/clkdev.h
index 8a8423eb8e9a..45570bc21a43 100644
--- a/include/linux/clkdev.h
+++ b/include/linux/clkdev.h
@@ -46,6 +46,4 @@ int clk_hw_register_clkdev(struct clk_hw *, const char *, const char *);
int devm_clk_hw_register_clkdev(struct device *dev, struct clk_hw *hw,
const char *con_id, const char *dev_id);
-void devm_clk_release_clkdev(struct device *dev, const char *con_id,
- const char *dev_id);
#endif
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 594357881b0b..44b1736c95b5 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -126,11 +126,9 @@ struct compat_tms {
#define _COMPAT_NSIG_WORDS (_COMPAT_NSIG / _COMPAT_NSIG_BPW)
-#ifndef compat_sigset_t
typedef struct {
compat_sigset_word sig[_COMPAT_NSIG_WORDS];
} compat_sigset_t;
-#endif
int set_compat_user_sigmask(const compat_sigset_t __user *umask,
size_t sigsetsize);
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index c84fec767445..6cfd6902bd5b 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -51,6 +51,29 @@
#define __no_sanitize_undefined
#endif
+#if __has_feature(memory_sanitizer)
+#define __SANITIZE_MEMORY__
+/*
+ * Unlike other sanitizers, KMSAN still inserts code into functions marked with
+ * no_sanitize("kernel-memory"). Using disable_sanitizer_instrumentation
+ * provides the behavior consistent with other __no_sanitize_ attributes,
+ * guaranteeing that __no_sanitize_memory functions remain uninstrumented.
+ */
+#define __no_sanitize_memory __disable_sanitizer_instrumentation
+
+/*
+ * The __no_kmsan_checks attribute ensures that a function does not produce
+ * false positive reports by:
+ * - initializing all local variables and memory stores in this function;
+ * - skipping all shadow checks;
+ * - passing initialized arguments to this function's callees.
+ */
+#define __no_kmsan_checks __attribute__((no_sanitize("kernel-memory")))
+#else
+#define __no_sanitize_memory
+#define __no_kmsan_checks
+#endif
+
/*
* Support for __has_feature(coverage_sanitizer) was added in Clang 13 together
* with no_sanitize("coverage"). Prior versions of Clang support coverage
@@ -66,17 +89,9 @@
# define __noscs __attribute__((__no_sanitize__("shadow-call-stack")))
#endif
-#define __nocfi __attribute__((__no_sanitize__("cfi")))
-#define __cficanonical __attribute__((__cfi_canonical_jump_table__))
-
-#if defined(CONFIG_CFI_CLANG)
-/*
- * With CONFIG_CFI_CLANG, the compiler replaces function address
- * references with the address of the function's CFI jump table
- * entry. The function_nocfi macro always returns the address of the
- * actual function instead.
- */
-#define function_nocfi(x) __builtin_function_start(x)
+#if __has_feature(kcfi)
+/* Disable CFI checking inside a function. */
+#define __nocfi __attribute__((__no_sanitize__("kcfi")))
#endif
/*
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 9b157b71036f..7af9e34ec261 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -82,26 +82,21 @@
#define __noscs __attribute__((__no_sanitize__("shadow-call-stack")))
#endif
-#if __has_attribute(__no_sanitize_address__)
-#define __no_sanitize_address __attribute__((no_sanitize_address))
-#else
-#define __no_sanitize_address
-#endif
+#define __no_sanitize_address __attribute__((__no_sanitize_address__))
-#if defined(__SANITIZE_THREAD__) && __has_attribute(__no_sanitize_thread__)
-#define __no_sanitize_thread __attribute__((no_sanitize_thread))
+#if defined(__SANITIZE_THREAD__)
+#define __no_sanitize_thread __attribute__((__no_sanitize_thread__))
#else
#define __no_sanitize_thread
#endif
-#if __has_attribute(__no_sanitize_undefined__)
-#define __no_sanitize_undefined __attribute__((no_sanitize_undefined))
-#else
-#define __no_sanitize_undefined
-#endif
+#define __no_sanitize_undefined __attribute__((__no_sanitize_undefined__))
+/*
+ * Only supported since gcc >= 12
+ */
#if defined(CONFIG_KCOV) && __has_attribute(__no_sanitize_coverage__)
-#define __no_sanitize_coverage __attribute__((no_sanitize_coverage))
+#define __no_sanitize_coverage __attribute__((__no_sanitize_coverage__))
#else
#define __no_sanitize_coverage
#endif
@@ -115,6 +110,12 @@
#endif
/*
+ * GCC does not support KMSAN.
+ */
+#define __no_sanitize_memory
+#define __no_kmsan_checks
+
+/*
* Turn individual warnings and errors on and off locally, depending
* on version.
*/
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 01ce94b58b42..947a60b801db 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -203,16 +203,6 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
__v; \
})
-/*
- * With CONFIG_CFI_CLANG, the compiler replaces function addresses in
- * instrumented C code with jump table addresses. Architectures that
- * support CFI can define this macro to return the actual function address
- * when needed.
- */
-#ifndef function_nocfi
-#define function_nocfi(x) (x)
-#endif
-
#endif /* __KERNEL__ */
/*
@@ -221,9 +211,11 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
* otherwise, or eliminated entirely due to lack of references that are
* visible to the compiler.
*/
-#define __ADDRESSABLE(sym) \
- static void * __section(".discard.addressable") __used \
+#define ___ADDRESSABLE(sym, __attrs) \
+ static void * __used __attrs \
__UNIQUE_ID(__PASTE(__addressable_,sym)) = (void *)&sym;
+#define __ADDRESSABLE(sym) \
+ ___ADDRESSABLE(sym, __section(".discard.addressable"))
/**
* offset_to_ptr - convert a relative memory offset to an absolute pointer
@@ -240,6 +232,13 @@ static inline void *offset_to_ptr(const int *off)
#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
/*
+ * Whether 'type' is a signed type or an unsigned type. Supports scalar types,
+ * bool and also pointer types.
+ */
+#define is_signed_type(type) (((type)(-1)) < (__force type)1)
+#define is_unsigned_type(type) (!is_signed_type(type))
+
+/*
* This is needed in functions which generate the stack canary, see
* arch/x86/kernel/smpboot.c::start_secondary() for an example.
*/
diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h
index 445e80517cab..898b3458b24a 100644
--- a/include/linux/compiler_attributes.h
+++ b/include/linux/compiler_attributes.h
@@ -35,7 +35,8 @@
/*
* Note: do not use this directly. Instead, use __alloc_size() since it is conditionally
- * available and includes other attributes.
+ * available and includes other attributes. For GCC < 9.1, __alloc_size__ gets undefined
+ * in compiler-gcc.h, due to misbehaviors.
*
* gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-alloc_005fsize-function-attribute
* clang: https://clang.llvm.org/docs/AttributeReference.html#alloc-size
@@ -371,4 +372,11 @@
*/
#define __weak __attribute__((__weak__))
+/*
+ * Used by functions that use '__builtin_return_address'. These function
+ * don't want to be splited or made inline, which can make
+ * the '__builtin_return_address' get unexpected address.
+ */
+#define __fix_address noinline __noclone
+
#endif /* __LINUX_COMPILER_ATTRIBUTES_H */
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 4f2a819fd60a..7c1afe0f4129 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -4,8 +4,12 @@
#ifndef __ASSEMBLY__
+/*
+ * Skipped when running bindgen due to a libclang issue;
+ * see https://github.com/rust-lang/rust-bindgen/issues/2244.
+ */
#if defined(CONFIG_DEBUG_INFO_BTF) && defined(CONFIG_PAHOLE_HAS_BTF_TAG) && \
- __has_attribute(btf_type_tag)
+ __has_attribute(btf_type_tag) && !defined(__BINDGEN__)
# define BTF_TYPE_TAG(value) __attribute__((btf_type_tag(#value)))
#else
# define BTF_TYPE_TAG(value) /* nothing */
@@ -45,7 +49,8 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { }
# endif
# define __iomem
# define __percpu BTF_TYPE_TAG(percpu)
-# define __rcu
+# define __rcu BTF_TYPE_TAG(rcu)
+
# define __chk_user_ptr(x) (void)0
# define __chk_io_ptr(x) (void)0
/* context/locking */
@@ -229,7 +234,8 @@ struct ftrace_likely_data {
/* Section for code which can't be instrumented at all */
#define noinstr \
noinline notrace __attribute((__section__(".noinstr.text"))) \
- __no_kcsan __no_sanitize_address __no_profile __no_sanitize_coverage
+ __no_kcsan __no_sanitize_address __no_profile __no_sanitize_coverage \
+ __no_sanitize_memory
#endif /* __KERNEL__ */
@@ -265,20 +271,18 @@ struct ftrace_likely_data {
# define __nocfi
#endif
-#ifndef __cficanonical
-# define __cficanonical
-#endif
-
/*
* Any place that could be marked with the "alloc_size" attribute is also
- * a place to be marked with the "malloc" attribute. Do this as part of the
- * __alloc_size macro to avoid redundant attributes and to avoid missing a
- * __malloc marking.
+ * a place to be marked with the "malloc" attribute, except those that may
+ * be performing a _reallocation_, as that may alias the existing pointer.
+ * For these, use __realloc_size().
*/
#ifdef __alloc_size__
# define __alloc_size(x, ...) __alloc_size__(x, ## __VA_ARGS__) __malloc
+# define __realloc_size(x, ...) __alloc_size__(x, ## __VA_ARGS__)
#else
# define __alloc_size(x, ...) __malloc
+# define __realloc_size(x, ...)
#endif
#ifndef asm_volatile_goto
diff --git a/include/linux/completion.h b/include/linux/completion.h
index 51d9ab079629..62b32b19e0a8 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -103,6 +103,7 @@ extern void wait_for_completion(struct completion *);
extern void wait_for_completion_io(struct completion *);
extern int wait_for_completion_interruptible(struct completion *x);
extern int wait_for_completion_killable(struct completion *x);
+extern int wait_for_completion_state(struct completion *x, unsigned int state);
extern unsigned long wait_for_completion_timeout(struct completion *x,
unsigned long timeout);
extern unsigned long wait_for_completion_io_timeout(struct completion *x,
diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index 97cfd13bae51..2606711adb18 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -204,8 +204,6 @@ static struct configfs_bin_attribute _pfx##attr_##_name = { \
* group children. default_groups may coexist alongsize make_group() or
* make_item(), but if the group wishes to have only default_groups
* children (disallowing mkdir(2)), it need not provide either function.
- * If the group has commit(), it supports pending and committed (active)
- * items.
*/
struct configfs_item_operations {
void (*release)(struct config_item *);
@@ -216,7 +214,6 @@ struct configfs_item_operations {
struct configfs_group_operations {
struct config_item *(*make_item)(struct config_group *group, const char *name);
struct config_group *(*make_group)(struct config_group *group, const char *name);
- int (*commit_item)(struct config_item *item);
void (*disconnect_notify)(struct config_group *group, struct config_item *item);
void (*drop_item)(struct config_group *group, struct config_item *item);
};
diff --git a/include/linux/console.h b/include/linux/console.h
index 8c1686e2c233..9cea254b34b8 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -15,6 +15,7 @@
#define _LINUX_CONSOLE_H_ 1
#include <linux/atomic.h>
+#include <linux/rculist.h>
#include <linux/types.h>
struct vc_data;
@@ -154,14 +155,132 @@ struct console {
u64 seq;
unsigned long dropped;
void *data;
- struct console *next;
+ struct hlist_node node;
};
+#ifdef CONFIG_LOCKDEP
+extern void lockdep_assert_console_list_lock_held(void);
+#else
+static inline void lockdep_assert_console_list_lock_held(void)
+{
+}
+#endif
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+extern bool console_srcu_read_lock_is_held(void);
+#else
+static inline bool console_srcu_read_lock_is_held(void)
+{
+ return 1;
+}
+#endif
+
+extern int console_srcu_read_lock(void);
+extern void console_srcu_read_unlock(int cookie);
+
+extern void console_list_lock(void) __acquires(console_mutex);
+extern void console_list_unlock(void) __releases(console_mutex);
+
+extern struct hlist_head console_list;
+
+/**
+ * console_srcu_read_flags - Locklessly read the console flags
+ * @con: struct console pointer of console to read flags from
+ *
+ * This function provides the necessary READ_ONCE() and data_race()
+ * notation for locklessly reading the console flags. The READ_ONCE()
+ * in this function matches the WRITE_ONCE() when @flags are modified
+ * for registered consoles with console_srcu_write_flags().
+ *
+ * Only use this function to read console flags when locklessly
+ * iterating the console list via srcu.
+ *
+ * Context: Any context.
+ */
+static inline short console_srcu_read_flags(const struct console *con)
+{
+ WARN_ON_ONCE(!console_srcu_read_lock_is_held());
+
+ /*
+ * Locklessly reading console->flags provides a consistent
+ * read value because there is at most one CPU modifying
+ * console->flags and that CPU is using only read-modify-write
+ * operations to do so.
+ */
+ return data_race(READ_ONCE(con->flags));
+}
+
+/**
+ * console_srcu_write_flags - Write flags for a registered console
+ * @con: struct console pointer of console to write flags to
+ * @flags: new flags value to write
+ *
+ * Only use this function to write flags for registered consoles. It
+ * requires holding the console_list_lock.
+ *
+ * Context: Any context.
+ */
+static inline void console_srcu_write_flags(struct console *con, short flags)
+{
+ lockdep_assert_console_list_lock_held();
+
+ /* This matches the READ_ONCE() in console_srcu_read_flags(). */
+ WRITE_ONCE(con->flags, flags);
+}
+
+/* Variant of console_is_registered() when the console_list_lock is held. */
+static inline bool console_is_registered_locked(const struct console *con)
+{
+ lockdep_assert_console_list_lock_held();
+ return !hlist_unhashed(&con->node);
+}
+
/*
- * for_each_console() allows you to iterate on each console
+ * console_is_registered - Check if the console is registered
+ * @con: struct console pointer of console to check
+ *
+ * Context: Process context. May sleep while acquiring console list lock.
+ * Return: true if the console is in the console list, otherwise false.
+ *
+ * If false is returned for a console that was previously registered, it
+ * can be assumed that the console's unregistration is fully completed,
+ * including the exit() callback after console list removal.
+ */
+static inline bool console_is_registered(const struct console *con)
+{
+ bool ret;
+
+ console_list_lock();
+ ret = console_is_registered_locked(con);
+ console_list_unlock();
+ return ret;
+}
+
+/**
+ * for_each_console_srcu() - Iterator over registered consoles
+ * @con: struct console pointer used as loop cursor
+ *
+ * Although SRCU guarantees the console list will be consistent, the
+ * struct console fields may be updated by other CPUs while iterating.
+ *
+ * Requires console_srcu_read_lock to be held. Can be invoked from
+ * any context.
+ */
+#define for_each_console_srcu(con) \
+ hlist_for_each_entry_srcu(con, &console_list, node, \
+ console_srcu_read_lock_is_held())
+
+/**
+ * for_each_console() - Iterator over registered consoles
+ * @con: struct console pointer used as loop cursor
+ *
+ * The console list and the console->flags are immutable while iterating.
+ *
+ * Requires console_list_lock to be held.
*/
-#define for_each_console(con) \
- for (con = console_drivers; con != NULL; con = con->next)
+#define for_each_console(con) \
+ lockdep_assert_console_list_lock_held(); \
+ hlist_for_each_entry(con, &console_list, node)
extern int console_set_on_cmdline;
extern struct console *early_console;
@@ -172,9 +291,9 @@ enum con_flush_mode {
};
extern int add_preferred_console(char *name, int idx, char *options);
+extern void console_force_preferred_locked(struct console *con);
extern void register_console(struct console *);
extern int unregister_console(struct console *);
-extern struct console *console_drivers;
extern void console_lock(void);
extern int console_trylock(void);
extern void console_unlock(void);
diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index 08a1d3e7e46d..d3eba4360150 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -18,10 +18,10 @@ struct core_vma_metadata {
struct coredump_params {
const kernel_siginfo_t *siginfo;
- struct pt_regs *regs;
struct file *file;
unsigned long limit;
unsigned long mm_flags;
+ int cpu;
loff_t written;
loff_t pos;
loff_t to_skip;
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index 9f445f09fcfe..1554021231f9 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -372,6 +372,29 @@ static inline u32 csdev_access_relaxed_read32(struct csdev_access *csa,
return csa->read(offset, true, false);
}
+static inline u64 csdev_access_relaxed_read_pair(struct csdev_access *csa,
+ u32 lo_offset, u32 hi_offset)
+{
+ if (likely(csa->io_mem)) {
+ return readl_relaxed(csa->base + lo_offset) |
+ ((u64)readl_relaxed(csa->base + hi_offset) << 32);
+ }
+
+ return csa->read(lo_offset, true, false) | (csa->read(hi_offset, true, false) << 32);
+}
+
+static inline void csdev_access_relaxed_write_pair(struct csdev_access *csa, u64 val,
+ u32 lo_offset, u32 hi_offset)
+{
+ if (likely(csa->io_mem)) {
+ writel_relaxed((u32)val, csa->base + lo_offset);
+ writel_relaxed((u32)(val >> 32), csa->base + hi_offset);
+ } else {
+ csa->write((u32)val, lo_offset, true, false);
+ csa->write((u32)(val >> 32), hi_offset, true, false);
+ }
+}
+
static inline u32 csdev_access_read32(struct csdev_access *csa, u32 offset)
{
if (likely(csa->io_mem))
diff --git a/include/linux/counter.h b/include/linux/counter.h
index 1fe17f5adb09..b63746637de2 100644
--- a/include/linux/counter.h
+++ b/include/linux/counter.h
@@ -31,6 +31,8 @@ enum counter_comp_type {
COUNTER_COMP_ENUM,
COUNTER_COMP_COUNT_DIRECTION,
COUNTER_COMP_COUNT_MODE,
+ COUNTER_COMP_SIGNAL_POLARITY,
+ COUNTER_COMP_ARRAY,
};
/**
@@ -38,66 +40,114 @@ enum counter_comp_type {
* @type: Counter component data type
* @name: device-specific component name
* @priv: component-relevant data
- * @action_read: Synapse action mode read callback. The read value of the
+ * @action_read: Synapse action mode read callback. The read value of the
* respective Synapse action mode should be passed back via
* the action parameter.
- * @device_u8_read: Device u8 component read callback. The read value of the
+ * @device_u8_read: Device u8 component read callback. The read value of the
* respective Device u8 component should be passed back via
* the val parameter.
- * @count_u8_read: Count u8 component read callback. The read value of the
+ * @count_u8_read: Count u8 component read callback. The read value of the
* respective Count u8 component should be passed back via
* the val parameter.
- * @signal_u8_read: Signal u8 component read callback. The read value of the
+ * @signal_u8_read: Signal u8 component read callback. The read value of the
* respective Signal u8 component should be passed back via
* the val parameter.
- * @device_u32_read: Device u32 component read callback. The read value of
+ * @device_u32_read: Device u32 component read callback. The read value of
* the respective Device u32 component should be passed
* back via the val parameter.
- * @count_u32_read: Count u32 component read callback. The read value of the
+ * @count_u32_read: Count u32 component read callback. The read value of the
* respective Count u32 component should be passed back via
* the val parameter.
- * @signal_u32_read: Signal u32 component read callback. The read value of
+ * @signal_u32_read: Signal u32 component read callback. The read value of
* the respective Signal u32 component should be passed
* back via the val parameter.
- * @device_u64_read: Device u64 component read callback. The read value of
+ * @device_u64_read: Device u64 component read callback. The read value of
* the respective Device u64 component should be passed
* back via the val parameter.
- * @count_u64_read: Count u64 component read callback. The read value of the
+ * @count_u64_read: Count u64 component read callback. The read value of the
* respective Count u64 component should be passed back via
* the val parameter.
- * @signal_u64_read: Signal u64 component read callback. The read value of
+ * @signal_u64_read: Signal u64 component read callback. The read value of
* the respective Signal u64 component should be passed
* back via the val parameter.
- * @action_write: Synapse action mode write callback. The write value of
+ * @signal_array_u32_read: Signal u32 array component read callback. The
+ * index of the respective Count u32 array
+ * component element is passed via the idx
+ * parameter. The read value of the respective
+ * Count u32 array component element should be
+ * passed back via the val parameter.
+ * @device_array_u64_read: Device u64 array component read callback. The
+ * index of the respective Device u64 array
+ * component element is passed via the idx
+ * parameter. The read value of the respective
+ * Device u64 array component element should be
+ * passed back via the val parameter.
+ * @count_array_u64_read: Count u64 array component read callback. The
+ * index of the respective Count u64 array
+ * component element is passed via the idx
+ * parameter. The read value of the respective
+ * Count u64 array component element should be
+ * passed back via the val parameter.
+ * @signal_array_u64_read: Signal u64 array component read callback. The
+ * index of the respective Count u64 array
+ * component element is passed via the idx
+ * parameter. The read value of the respective
+ * Count u64 array component element should be
+ * passed back via the val parameter.
+ * @action_write: Synapse action mode write callback. The write value of
* the respective Synapse action mode is passed via the
* action parameter.
- * @device_u8_write: Device u8 component write callback. The write value of
+ * @device_u8_write: Device u8 component write callback. The write value of
* the respective Device u8 component is passed via the val
* parameter.
- * @count_u8_write: Count u8 component write callback. The write value of
+ * @count_u8_write: Count u8 component write callback. The write value of
* the respective Count u8 component is passed via the val
* parameter.
- * @signal_u8_write: Signal u8 component write callback. The write value of
+ * @signal_u8_write: Signal u8 component write callback. The write value of
* the respective Signal u8 component is passed via the val
* parameter.
- * @device_u32_write: Device u32 component write callback. The write value of
+ * @device_u32_write: Device u32 component write callback. The write value of
* the respective Device u32 component is passed via the
* val parameter.
- * @count_u32_write: Count u32 component write callback. The write value of
+ * @count_u32_write: Count u32 component write callback. The write value of
* the respective Count u32 component is passed via the val
* parameter.
- * @signal_u32_write: Signal u32 component write callback. The write value of
+ * @signal_u32_write: Signal u32 component write callback. The write value of
* the respective Signal u32 component is passed via the
* val parameter.
- * @device_u64_write: Device u64 component write callback. The write value of
+ * @device_u64_write: Device u64 component write callback. The write value of
* the respective Device u64 component is passed via the
* val parameter.
- * @count_u64_write: Count u64 component write callback. The write value of
+ * @count_u64_write: Count u64 component write callback. The write value of
* the respective Count u64 component is passed via the val
* parameter.
- * @signal_u64_write: Signal u64 component write callback. The write value of
+ * @signal_u64_write: Signal u64 component write callback. The write value of
* the respective Signal u64 component is passed via the
* val parameter.
+ * @signal_array_u32_write: Signal u32 array component write callback. The
+ * index of the respective Signal u32 array
+ * component element is passed via the idx
+ * parameter. The write value of the respective
+ * Signal u32 array component element is passed via
+ * the val parameter.
+ * @device_array_u64_write: Device u64 array component write callback. The
+ * index of the respective Device u64 array
+ * component element is passed via the idx
+ * parameter. The write value of the respective
+ * Device u64 array component element is passed via
+ * the val parameter.
+ * @count_array_u64_write: Count u64 array component write callback. The
+ * index of the respective Count u64 array
+ * component element is passed via the idx
+ * parameter. The write value of the respective
+ * Count u64 array component element is passed via
+ * the val parameter.
+ * @signal_array_u64_write: Signal u64 array component write callback. The
+ * index of the respective Signal u64 array
+ * component element is passed via the idx
+ * parameter. The write value of the respective
+ * Signal u64 array component element is passed via
+ * the val parameter.
*/
struct counter_comp {
enum counter_comp_type type;
@@ -125,6 +175,17 @@ struct counter_comp {
struct counter_count *count, u64 *val);
int (*signal_u64_read)(struct counter_device *counter,
struct counter_signal *signal, u64 *val);
+ int (*signal_array_u32_read)(struct counter_device *counter,
+ struct counter_signal *signal,
+ size_t idx, u32 *val);
+ int (*device_array_u64_read)(struct counter_device *counter,
+ size_t idx, u64 *val);
+ int (*count_array_u64_read)(struct counter_device *counter,
+ struct counter_count *count,
+ size_t idx, u64 *val);
+ int (*signal_array_u64_read)(struct counter_device *counter,
+ struct counter_signal *signal,
+ size_t idx, u64 *val);
};
union {
int (*action_write)(struct counter_device *counter,
@@ -148,6 +209,17 @@ struct counter_comp {
struct counter_count *count, u64 val);
int (*signal_u64_write)(struct counter_device *counter,
struct counter_signal *signal, u64 val);
+ int (*signal_array_u32_write)(struct counter_device *counter,
+ struct counter_signal *signal,
+ size_t idx, u32 val);
+ int (*device_array_u64_write)(struct counter_device *counter,
+ size_t idx, u64 val);
+ int (*count_array_u64_write)(struct counter_device *counter,
+ struct counter_count *count,
+ size_t idx, u64 val);
+ int (*signal_array_u64_write)(struct counter_device *counter,
+ struct counter_signal *signal,
+ size_t idx, u64 val);
};
};
@@ -452,6 +524,59 @@ struct counter_available {
.priv = &(_available), \
}
+struct counter_array {
+ enum counter_comp_type type;
+ const struct counter_available *avail;
+ union {
+ size_t length;
+ size_t idx;
+ };
+};
+
+#define DEFINE_COUNTER_ARRAY_U64(_name, _length) \
+ struct counter_array _name = { \
+ .type = COUNTER_COMP_U64, \
+ .length = (_length), \
+ }
+
+#define DEFINE_COUNTER_ARRAY_CAPTURE(_name, _length) \
+ DEFINE_COUNTER_ARRAY_U64(_name, _length)
+
+#define DEFINE_COUNTER_ARRAY_POLARITY(_name, _available, _length) \
+ struct counter_array _name = { \
+ .type = COUNTER_COMP_SIGNAL_POLARITY, \
+ .avail = &(_available), \
+ .length = (_length), \
+ }
+
+#define COUNTER_COMP_DEVICE_ARRAY_U64(_name, _read, _write, _array) \
+{ \
+ .type = COUNTER_COMP_ARRAY, \
+ .name = (_name), \
+ .device_array_u64_read = (_read), \
+ .device_array_u64_write = (_write), \
+ .priv = &(_array), \
+}
+#define COUNTER_COMP_COUNT_ARRAY_U64(_name, _read, _write, _array) \
+{ \
+ .type = COUNTER_COMP_ARRAY, \
+ .name = (_name), \
+ .count_array_u64_read = (_read), \
+ .count_array_u64_write = (_write), \
+ .priv = &(_array), \
+}
+#define COUNTER_COMP_SIGNAL_ARRAY_U64(_name, _read, _write, _array) \
+{ \
+ .type = COUNTER_COMP_ARRAY, \
+ .name = (_name), \
+ .signal_array_u64_read = (_read), \
+ .signal_array_u64_write = (_write), \
+ .priv = &(_array), \
+}
+
+#define COUNTER_COMP_CAPTURE(_read, _write) \
+ COUNTER_COMP_COUNT_U64("capture", _read, _write)
+
#define COUNTER_COMP_CEILING(_read, _write) \
COUNTER_COMP_COUNT_U64("ceiling", _read, _write)
@@ -477,10 +602,31 @@ struct counter_available {
#define COUNTER_COMP_FLOOR(_read, _write) \
COUNTER_COMP_COUNT_U64("floor", _read, _write)
+#define COUNTER_COMP_POLARITY(_read, _write, _available) \
+{ \
+ .type = COUNTER_COMP_SIGNAL_POLARITY, \
+ .name = "polarity", \
+ .signal_u32_read = (_read), \
+ .signal_u32_write = (_write), \
+ .priv = &(_available), \
+}
+
#define COUNTER_COMP_PRESET(_read, _write) \
COUNTER_COMP_COUNT_U64("preset", _read, _write)
#define COUNTER_COMP_PRESET_ENABLE(_read, _write) \
COUNTER_COMP_COUNT_BOOL("preset_enable", _read, _write)
+#define COUNTER_COMP_ARRAY_CAPTURE(_read, _write, _array) \
+ COUNTER_COMP_COUNT_ARRAY_U64("capture", _read, _write, _array)
+
+#define COUNTER_COMP_ARRAY_POLARITY(_read, _write, _array) \
+{ \
+ .type = COUNTER_COMP_ARRAY, \
+ .name = "polarity", \
+ .signal_array_u32_read = (_read), \
+ .signal_array_u32_write = (_write), \
+ .priv = &(_array), \
+}
+
#endif /* _COUNTER_H_ */
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index d5595d57f4e5..6a94a6eaad27 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -1110,10 +1110,10 @@ cpufreq_table_set_inefficient(struct cpufreq_policy *policy,
}
static inline int parse_perf_domain(int cpu, const char *list_name,
- const char *cell_name)
+ const char *cell_name,
+ struct of_phandle_args *args)
{
struct device_node *cpu_np;
- struct of_phandle_args args;
int ret;
cpu_np = of_cpu_device_node_get(cpu);
@@ -1121,41 +1121,44 @@ static inline int parse_perf_domain(int cpu, const char *list_name,
return -ENODEV;
ret = of_parse_phandle_with_args(cpu_np, list_name, cell_name, 0,
- &args);
+ args);
if (ret < 0)
return ret;
of_node_put(cpu_np);
- return args.args[0];
+ return 0;
}
static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_name,
- const char *cell_name, struct cpumask *cpumask)
+ const char *cell_name, struct cpumask *cpumask,
+ struct of_phandle_args *pargs)
{
- int target_idx;
int cpu, ret;
+ struct of_phandle_args args;
- ret = parse_perf_domain(pcpu, list_name, cell_name);
+ ret = parse_perf_domain(pcpu, list_name, cell_name, pargs);
if (ret < 0)
return ret;
- target_idx = ret;
cpumask_set_cpu(pcpu, cpumask);
for_each_possible_cpu(cpu) {
if (cpu == pcpu)
continue;
- ret = parse_perf_domain(cpu, list_name, cell_name);
+ ret = parse_perf_domain(cpu, list_name, cell_name, &args);
if (ret < 0)
continue;
- if (target_idx == ret)
+ if (pargs->np == args.np && pargs->args_count == args.args_count &&
+ !memcmp(pargs->args, args.args, sizeof(args.args[0]) * args.args_count))
cpumask_set_cpu(cpu, cpumask);
+
+ of_node_put(args.np);
}
- return target_idx;
+ return 0;
}
#else
static inline int cpufreq_boost_trigger_state(int state)
@@ -1185,7 +1188,8 @@ cpufreq_table_set_inefficient(struct cpufreq_policy *policy,
}
static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_name,
- const char *cell_name, struct cpumask *cpumask)
+ const char *cell_name, struct cpumask *cpumask,
+ struct of_phandle_args *pargs)
{
return -EOPNOTSUPP;
}
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index f61447913db9..6c6859bfc454 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -69,6 +69,7 @@ enum cpuhp_state {
CPUHP_X86_APB_DEAD,
CPUHP_X86_MCE_DEAD,
CPUHP_VIRT_NET_DEAD,
+ CPUHP_IBMVNIC_DEAD,
CPUHP_SLUB_DEAD,
CPUHP_DEBUG_OBJ_DEAD,
CPUHP_MM_WRITEBACK_DEAD,
@@ -140,6 +141,7 @@ enum cpuhp_state {
*/
CPUHP_AP_IDLE_DEAD,
CPUHP_AP_OFFLINE,
+ CPUHP_AP_CACHECTRL_STARTING,
CPUHP_AP_SCHED_STARTING,
CPUHP_AP_RCUTREE_DYING,
CPUHP_AP_CPU_PM_STARTING,
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 0d435d0edbcb..c2aa0aa26b45 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -35,19 +35,23 @@ typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t;
*/
#define cpumask_pr_args(maskp) nr_cpu_ids, cpumask_bits(maskp)
-#if NR_CPUS == 1
-#define nr_cpu_ids 1U
+#if (NR_CPUS == 1) || defined(CONFIG_FORCE_NR_CPUS)
+#define nr_cpu_ids ((unsigned int)NR_CPUS)
#else
extern unsigned int nr_cpu_ids;
#endif
-#ifdef CONFIG_CPUMASK_OFFSTACK
-/* Assuming NR_CPUS is huge, a runtime limit is more efficient. Also,
- * not all bits may be allocated. */
-#define nr_cpumask_bits nr_cpu_ids
+static inline void set_nr_cpu_ids(unsigned int nr)
+{
+#if (NR_CPUS == 1) || defined(CONFIG_FORCE_NR_CPUS)
+ WARN_ON(nr != nr_cpu_ids);
#else
-#define nr_cpumask_bits ((unsigned int)NR_CPUS)
+ nr_cpu_ids = nr;
#endif
+}
+
+/* Deprecated. Always use nr_cpu_ids. */
+#define nr_cpumask_bits nr_cpu_ids
/*
* The following particular system cpumasks and operations manage
@@ -67,10 +71,6 @@ extern unsigned int nr_cpu_ids;
* cpu_online_mask is the dynamic subset of cpu_present_mask,
* indicating those CPUs available for scheduling.
*
- * If HOTPLUG is enabled, then cpu_possible_mask is forced to have
- * all NR_CPUS bits set, otherwise it is just the set of CPUs that
- * ACPI reports present at boot.
- *
* If HOTPLUG is enabled, then cpu_present_mask varies dynamically,
* depending on what ACPI reports as currently plugged in, otherwise
* cpu_present_mask is just a copy of cpu_possible_mask.
@@ -202,12 +202,13 @@ static inline unsigned int cpumask_local_spread(unsigned int i, int node)
return 0;
}
-static inline int cpumask_any_and_distribute(const struct cpumask *src1p,
- const struct cpumask *src2p) {
+static inline unsigned int cpumask_any_and_distribute(const struct cpumask *src1p,
+ const struct cpumask *src2p)
+{
return cpumask_first_and(src1p, src2p);
}
-static inline int cpumask_any_distribute(const struct cpumask *srcp)
+static inline unsigned int cpumask_any_distribute(const struct cpumask *srcp)
{
return cpumask_first(srcp);
}
@@ -245,9 +246,7 @@ unsigned int cpumask_next_and(int n, const struct cpumask *src1p,
* After the loop, cpu is >= nr_cpu_ids.
*/
#define for_each_cpu(cpu, mask) \
- for ((cpu) = -1; \
- (cpu) = cpumask_next((cpu), (mask)), \
- (cpu) < nr_cpu_ids;)
+ for_each_set_bit(cpu, cpumask_bits(mask), nr_cpumask_bits)
/**
* for_each_cpu_not - iterate over every cpu in a complemented mask
@@ -257,11 +256,28 @@ unsigned int cpumask_next_and(int n, const struct cpumask *src1p,
* After the loop, cpu is >= nr_cpu_ids.
*/
#define for_each_cpu_not(cpu, mask) \
- for ((cpu) = -1; \
- (cpu) = cpumask_next_zero((cpu), (mask)), \
- (cpu) < nr_cpu_ids;)
+ for_each_clear_bit(cpu, cpumask_bits(mask), nr_cpumask_bits)
+
+#if NR_CPUS == 1
+static inline
+unsigned int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap)
+{
+ cpumask_check(start);
+ if (n != -1)
+ cpumask_check(n);
+ /*
+ * Return the first available CPU when wrapping, or when starting before cpu0,
+ * since there is only one valid option.
+ */
+ if (wrap && n >= 0)
+ return nr_cpumask_bits;
+
+ return cpumask_first(mask);
+}
+#else
unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap);
+#endif
/**
* for_each_cpu_wrap - iterate over every cpu in a mask, starting at a specified location
@@ -273,10 +289,8 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta
*
* After the loop, cpu is >= nr_cpu_ids.
*/
-#define for_each_cpu_wrap(cpu, mask, start) \
- for ((cpu) = cpumask_next_wrap((start)-1, (mask), (start), false); \
- (cpu) < nr_cpumask_bits; \
- (cpu) = cpumask_next_wrap((cpu), (mask), (start), true))
+#define for_each_cpu_wrap(cpu, mask, start) \
+ for_each_set_bit_wrap(cpu, cpumask_bits(mask), nr_cpumask_bits, start)
/**
* for_each_cpu_and - iterate over every cpu in both masks
@@ -293,9 +307,25 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta
* After the loop, cpu is >= nr_cpu_ids.
*/
#define for_each_cpu_and(cpu, mask1, mask2) \
- for ((cpu) = -1; \
- (cpu) = cpumask_next_and((cpu), (mask1), (mask2)), \
- (cpu) < nr_cpu_ids;)
+ for_each_and_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), nr_cpumask_bits)
+
+/**
+ * for_each_cpu_andnot - iterate over every cpu present in one mask, excluding
+ * those present in another.
+ * @cpu: the (optionally unsigned) integer iterator
+ * @mask1: the first cpumask pointer
+ * @mask2: the second cpumask pointer
+ *
+ * This saves a temporary CPU mask in many places. It is equivalent to:
+ * struct cpumask tmp;
+ * cpumask_andnot(&tmp, &mask1, &mask2);
+ * for_each_cpu(cpu, &tmp)
+ * ...
+ *
+ * After the loop, cpu is >= nr_cpu_ids.
+ */
+#define for_each_cpu_andnot(cpu, mask1, mask2) \
+ for_each_andnot_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), nr_cpumask_bits)
/**
* cpumask_any_but - return a "random" in a cpumask, but not this one.
@@ -317,6 +347,50 @@ unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu)
return i;
}
+/**
+ * cpumask_nth - get the first cpu in a cpumask
+ * @srcp: the cpumask pointer
+ * @cpu: the N'th cpu to find, starting from 0
+ *
+ * Returns >= nr_cpu_ids if such cpu doesn't exist.
+ */
+static inline unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *srcp)
+{
+ return find_nth_bit(cpumask_bits(srcp), nr_cpumask_bits, cpumask_check(cpu));
+}
+
+/**
+ * cpumask_nth_and - get the first cpu in 2 cpumasks
+ * @srcp1: the cpumask pointer
+ * @srcp2: the cpumask pointer
+ * @cpu: the N'th cpu to find, starting from 0
+ *
+ * Returns >= nr_cpu_ids if such cpu doesn't exist.
+ */
+static inline
+unsigned int cpumask_nth_and(unsigned int cpu, const struct cpumask *srcp1,
+ const struct cpumask *srcp2)
+{
+ return find_nth_and_bit(cpumask_bits(srcp1), cpumask_bits(srcp2),
+ nr_cpumask_bits, cpumask_check(cpu));
+}
+
+/**
+ * cpumask_nth_andnot - get the first cpu set in 1st cpumask, and clear in 2nd.
+ * @srcp1: the cpumask pointer
+ * @srcp2: the cpumask pointer
+ * @cpu: the N'th cpu to find, starting from 0
+ *
+ * Returns >= nr_cpu_ids if such cpu doesn't exist.
+ */
+static inline
+unsigned int cpumask_nth_andnot(unsigned int cpu, const struct cpumask *srcp1,
+ const struct cpumask *srcp2)
+{
+ return find_nth_andnot_bit(cpumask_bits(srcp1), cpumask_bits(srcp2),
+ nr_cpumask_bits, cpumask_check(cpu));
+}
+
#define CPU_BITS_NONE \
{ \
[0 ... BITS_TO_LONGS(NR_CPUS)-1] = 0UL \
@@ -567,6 +641,17 @@ static inline unsigned int cpumask_weight(const struct cpumask *srcp)
}
/**
+ * cpumask_weight_and - Count of bits in (*srcp1 & *srcp2)
+ * @srcp1: the cpumask to count bits (< nr_cpu_ids) in.
+ * @srcp2: the cpumask to count bits (< nr_cpu_ids) in.
+ */
+static inline unsigned int cpumask_weight_and(const struct cpumask *srcp1,
+ const struct cpumask *srcp2)
+{
+ return bitmap_weight_and(cpumask_bits(srcp1), cpumask_bits(srcp2), nr_cpumask_bits);
+}
+
+/**
* cpumask_shift_right - *dstp = *srcp >> n
* @dstp: the cpumask result
* @srcp: the input to shift
@@ -1107,9 +1192,10 @@ cpumap_print_list_to_buf(char *buf, const struct cpumask *mask,
* cover a worst-case of every other cpu being on one of two nodes for a
* very large NR_CPUS.
*
- * Use PAGE_SIZE as a minimum for smaller configurations.
+ * Use PAGE_SIZE as a minimum for smaller configurations while avoiding
+ * unsigned comparison to -1.
*/
-#define CPUMAP_FILE_MAX_BYTES ((((NR_CPUS * 9)/32 - 1) > PAGE_SIZE) \
+#define CPUMAP_FILE_MAX_BYTES (((NR_CPUS * 9)/32 > PAGE_SIZE) \
? (NR_CPUS * 9)/32 - 1 : PAGE_SIZE)
#define CPULIST_FILE_MAX_BYTES (((NR_CPUS * 7)/2 > PAGE_SIZE) ? (NR_CPUS * 7)/2 : PAGE_SIZE)
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 2324ab6f1846..5d1e961f810e 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -714,11 +714,6 @@ static inline void crypto_tfm_clear_flags(struct crypto_tfm *tfm, u32 flags)
tfm->crt_flags &= ~flags;
}
-static inline void *crypto_tfm_ctx(struct crypto_tfm *tfm)
-{
- return tfm->__crt_ctx;
-}
-
static inline unsigned int crypto_tfm_ctx_alignment(void)
{
struct crypto_tfm *tfm;
diff --git a/include/linux/cxl_err.h b/include/linux/cxl_err.h
new file mode 100644
index 000000000000..629e1bdeda44
--- /dev/null
+++ b/include/linux/cxl_err.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2022 Advanced Micro Devices, Inc.
+ *
+ * Author: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
+ */
+
+#ifndef LINUX_CXL_ERR_H
+#define LINUX_CXL_ERR_H
+
+/* CXL RAS Capability Structure, CXL v3.1 sec 8.2.4.16 */
+struct cxl_ras_capability_regs {
+ u32 uncor_status;
+ u32 uncor_mask;
+ u32 uncor_severity;
+ u32 cor_status;
+ u32 cor_mask;
+ u32 cap_control;
+ u32 header_log[16];
+};
+
+#endif //__CXL_ERR_
diff --git a/include/linux/damon.h b/include/linux/damon.h
index 7b1f4a488230..ad15a5b88e3a 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -21,7 +21,7 @@
/* Get a random number in [l, r) */
static inline unsigned long damon_rand(unsigned long l, unsigned long r)
{
- return l + prandom_u32_max(r - l);
+ return l + get_random_u32_below(r - l);
}
/**
@@ -216,13 +216,26 @@ struct damos_stat {
};
/**
- * struct damos - Represents a Data Access Monitoring-based Operation Scheme.
+ * struct damos_access_pattern - Target access pattern of the given scheme.
* @min_sz_region: Minimum size of target regions.
* @max_sz_region: Maximum size of target regions.
* @min_nr_accesses: Minimum ``->nr_accesses`` of target regions.
* @max_nr_accesses: Maximum ``->nr_accesses`` of target regions.
* @min_age_region: Minimum age of target regions.
* @max_age_region: Maximum age of target regions.
+ */
+struct damos_access_pattern {
+ unsigned long min_sz_region;
+ unsigned long max_sz_region;
+ unsigned int min_nr_accesses;
+ unsigned int max_nr_accesses;
+ unsigned int min_age_region;
+ unsigned int max_age_region;
+};
+
+/**
+ * struct damos - Represents a Data Access Monitoring-based Operation Scheme.
+ * @pattern: Access pattern of target regions.
* @action: &damo_action to be applied to the target regions.
* @quota: Control the aggressiveness of this scheme.
* @wmarks: Watermarks for automated (in)activation of this scheme.
@@ -230,10 +243,8 @@ struct damos_stat {
* @list: List head for siblings.
*
* For each aggregation interval, DAMON finds regions which fit in the
- * condition (&min_sz_region, &max_sz_region, &min_nr_accesses,
- * &max_nr_accesses, &min_age_region, &max_age_region) and applies &action to
- * those. To avoid consuming too much CPU time or IO resources for the
- * &action, &quota is used.
+ * &pattern and applies &action to those. To avoid consuming too much
+ * CPU time or IO resources for the &action, &quota is used.
*
* To do the work only when needed, schemes can be activated for specific
* system situations using &wmarks. If all schemes that registered to the
@@ -248,12 +259,7 @@ struct damos_stat {
* &action is applied.
*/
struct damos {
- unsigned long min_sz_region;
- unsigned long max_sz_region;
- unsigned int min_nr_accesses;
- unsigned int max_nr_accesses;
- unsigned int min_age_region;
- unsigned int max_age_region;
+ struct damos_access_pattern pattern;
enum damos_action action;
struct damos_quota quota;
struct damos_watermarks wmarks;
@@ -340,7 +346,7 @@ struct damon_operations {
unsigned long (*apply_scheme)(struct damon_ctx *context,
struct damon_target *t, struct damon_region *r,
struct damos *scheme);
- bool (*target_valid)(void *target);
+ bool (*target_valid)(struct damon_target *t);
void (*cleanup)(struct damon_ctx *context);
};
@@ -351,6 +357,7 @@ struct damon_operations {
* @after_wmarks_check: Called after each schemes' watermarks check.
* @after_sampling: Called after each sampling.
* @after_aggregation: Called after each aggregation.
+ * @before_damos_apply: Called before applying DAMOS action.
* @before_terminate: Called before terminating the monitoring.
* @private: User private data.
*
@@ -379,17 +386,23 @@ struct damon_callback {
int (*after_wmarks_check)(struct damon_ctx *context);
int (*after_sampling)(struct damon_ctx *context);
int (*after_aggregation)(struct damon_ctx *context);
+ int (*before_damos_apply)(struct damon_ctx *context,
+ struct damon_target *target,
+ struct damon_region *region,
+ struct damos *scheme);
void (*before_terminate)(struct damon_ctx *context);
};
/**
- * struct damon_ctx - Represents a context for each monitoring. This is the
- * main interface that allows users to set the attributes and get the results
- * of the monitoring.
+ * struct damon_attrs - Monitoring attributes for accuracy/overhead control.
*
* @sample_interval: The time between access samplings.
* @aggr_interval: The time between monitor results aggregations.
* @ops_update_interval: The time between monitoring operations updates.
+ * @min_nr_regions: The minimum number of adaptive monitoring
+ * regions.
+ * @max_nr_regions: The maximum number of adaptive monitoring
+ * regions.
*
* For each @sample_interval, DAMON checks whether each region is accessed or
* not. It aggregates and keeps the access information (number of accesses to
@@ -399,7 +412,21 @@ struct damon_callback {
* @ops_update_interval. All time intervals are in micro-seconds.
* Please refer to &struct damon_operations and &struct damon_callback for more
* detail.
+ */
+struct damon_attrs {
+ unsigned long sample_interval;
+ unsigned long aggr_interval;
+ unsigned long ops_update_interval;
+ unsigned long min_nr_regions;
+ unsigned long max_nr_regions;
+};
+
+/**
+ * struct damon_ctx - Represents a context for each monitoring. This is the
+ * main interface that allows users to set the attributes and get the results
+ * of the monitoring.
*
+ * @attrs: Monitoring attributes for accuracy/overhead control.
* @kdamond: Kernel thread who does the monitoring.
* @kdamond_lock: Mutex for the synchronizations with @kdamond.
*
@@ -421,15 +448,11 @@ struct damon_callback {
* @ops: Set of monitoring operations for given use cases.
* @callback: Set of callbacks for monitoring events notifications.
*
- * @min_nr_regions: The minimum number of adaptive monitoring regions.
- * @max_nr_regions: The maximum number of adaptive monitoring regions.
* @adaptive_targets: Head of monitoring targets (&damon_target) list.
* @schemes: Head of schemes (&damos) list.
*/
struct damon_ctx {
- unsigned long sample_interval;
- unsigned long aggr_interval;
- unsigned long ops_update_interval;
+ struct damon_attrs attrs;
/* private: internal use only */
struct timespec64 last_aggregation;
@@ -442,8 +465,6 @@ struct damon_ctx {
struct damon_operations ops;
struct damon_callback callback;
- unsigned long min_nr_regions;
- unsigned long max_nr_regions;
struct list_head adaptive_targets;
struct list_head schemes;
};
@@ -463,9 +484,23 @@ static inline struct damon_region *damon_last_region(struct damon_target *t)
return list_last_entry(&t->regions_list, struct damon_region, list);
}
+static inline struct damon_region *damon_first_region(struct damon_target *t)
+{
+ return list_first_entry(&t->regions_list, struct damon_region, list);
+}
+
+static inline unsigned long damon_sz_region(struct damon_region *r)
+{
+ return r->ar.end - r->ar.start;
+}
+
+
#define damon_for_each_region(r, t) \
list_for_each_entry(r, &t->regions_list, list)
+#define damon_for_each_region_from(r, t) \
+ list_for_each_entry_from(r, &t->regions_list, list)
+
#define damon_for_each_region_safe(r, next, t) \
list_for_each_entry_safe(r, next, &t->regions_list, list)
@@ -501,12 +536,9 @@ void damon_destroy_region(struct damon_region *r, struct damon_target *t);
int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges,
unsigned int nr_ranges);
-struct damos *damon_new_scheme(
- unsigned long min_sz_region, unsigned long max_sz_region,
- unsigned int min_nr_accesses, unsigned int max_nr_accesses,
- unsigned int min_age_region, unsigned int max_age_region,
- enum damos_action action, struct damos_quota *quota,
- struct damos_watermarks *wmarks);
+struct damos *damon_new_scheme(struct damos_access_pattern *pattern,
+ enum damos_action action, struct damos_quota *quota,
+ struct damos_watermarks *wmarks);
void damon_add_scheme(struct damon_ctx *ctx, struct damos *s);
void damon_destroy_scheme(struct damos *s);
@@ -519,10 +551,8 @@ unsigned int damon_nr_regions(struct damon_target *t);
struct damon_ctx *damon_new_ctx(void);
void damon_destroy_ctx(struct damon_ctx *ctx);
-int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
- unsigned long aggr_int, unsigned long ops_upd_int,
- unsigned long min_nr_reg, unsigned long max_nr_reg);
-int damon_set_schemes(struct damon_ctx *ctx,
+int damon_set_attrs(struct damon_ctx *ctx, struct damon_attrs *attrs);
+void damon_set_schemes(struct damon_ctx *ctx,
struct damos **schemes, ssize_t nr_schemes);
int damon_nr_running_ctxs(void);
bool damon_is_registered_ops(enum damon_ops_id id);
@@ -538,6 +568,9 @@ static inline bool damon_target_has_pid(const struct damon_ctx *ctx)
int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive);
int damon_stop(struct damon_ctx **ctxs, int nr_ctxs);
+int damon_set_region_biggest_system_ram_default(struct damon_target *t,
+ unsigned long *start, unsigned long *end);
+
#endif /* CONFIG_DAMON */
#endif /* _DAMON_H */
diff --git a/include/linux/dax.h b/include/linux/dax.h
index ba985333e26b..2b5ecb591059 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -205,6 +205,8 @@ static inline void dax_unlock_mapping_entry(struct address_space *mapping,
}
#endif
+int dax_file_unshare(struct inode *inode, loff_t pos, loff_t len,
+ const struct iomap_ops *ops);
int dax_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
const struct iomap_ops *ops);
int dax_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 92c78ed02b54..6b351e009f59 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -16,6 +16,7 @@
#include <linux/wait.h>
struct path;
+struct file;
struct vfsmount;
/*
@@ -250,7 +251,7 @@ extern struct dentry * d_make_root(struct inode *);
/* <clickety>-<click> the ramfs-type tree */
extern void d_genocide(struct dentry *);
-extern void d_tmpfile(struct dentry *, struct inode *);
+extern void d_tmpfile(struct file *, struct inode *);
extern struct dentry *d_find_alias(struct inode *);
extern void d_prune_aliases(struct inode *);
@@ -287,8 +288,8 @@ static inline unsigned d_count(const struct dentry *dentry)
/*
* helper function for dentry_operations.d_dname() members
*/
-extern __printf(4, 5)
-char *dynamic_dname(struct dentry *, char *, int, const char *, ...);
+extern __printf(3, 4)
+char *dynamic_dname(char *, int, const char *, ...);
extern char *__d_path(const struct path *, const struct path *, char *, int);
extern char *d_absolute_path(const struct path *, char *, int);
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index c869f1e73d75..ea2d919fd9c7 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -45,7 +45,7 @@ struct debugfs_u32_array {
extern struct dentry *arch_debugfs_dir;
-#define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt) \
+#define DEFINE_DEBUGFS_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, __is_signed) \
static int __fops ## _open(struct inode *inode, struct file *file) \
{ \
__simple_attr_check_format(__fmt, 0ull); \
@@ -56,10 +56,16 @@ static const struct file_operations __fops = { \
.open = __fops ## _open, \
.release = simple_attr_release, \
.read = debugfs_attr_read, \
- .write = debugfs_attr_write, \
+ .write = (__is_signed) ? debugfs_attr_write_signed : debugfs_attr_write, \
.llseek = no_llseek, \
}
+#define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt) \
+ DEFINE_DEBUGFS_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, false)
+
+#define DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(__fops, __get, __set, __fmt) \
+ DEFINE_DEBUGFS_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, true)
+
typedef struct vfsmount *(*debugfs_automount_t)(struct dentry *, void *);
#if defined(CONFIG_DEBUG_FS)
@@ -91,6 +97,8 @@ struct dentry *debugfs_create_automount(const char *name,
void debugfs_remove(struct dentry *dentry);
#define debugfs_remove_recursive debugfs_remove
+void debugfs_lookup_and_remove(const char *name, struct dentry *parent);
+
const struct file_operations *debugfs_real_fops(const struct file *filp);
int debugfs_file_get(struct dentry *dentry);
@@ -100,6 +108,8 @@ ssize_t debugfs_attr_read(struct file *file, char __user *buf,
size_t len, loff_t *ppos);
ssize_t debugfs_attr_write(struct file *file, const char __user *buf,
size_t len, loff_t *ppos);
+ssize_t debugfs_attr_write_signed(struct file *file, const char __user *buf,
+ size_t len, loff_t *ppos);
struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
struct dentry *new_dir, const char *new_name);
@@ -225,6 +235,10 @@ static inline void debugfs_remove(struct dentry *dentry)
static inline void debugfs_remove_recursive(struct dentry *dentry)
{ }
+static inline void debugfs_lookup_and_remove(const char *name,
+ struct dentry *parent)
+{ }
+
const struct file_operations *debugfs_real_fops(const struct file *filp);
static inline int debugfs_file_get(struct dentry *dentry)
@@ -248,6 +262,13 @@ static inline ssize_t debugfs_attr_write(struct file *file,
return -ENODEV;
}
+static inline ssize_t debugfs_attr_write_signed(struct file *file,
+ const char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ return -ENODEV;
+}
+
static inline struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
struct dentry *new_dir, char *new_name)
{
diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index 58aea2d7385c..0da97dba9ef8 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -73,8 +73,8 @@ extern int delayacct_add_tsk(struct taskstats *, struct task_struct *);
extern __u64 __delayacct_blkio_ticks(struct task_struct *);
extern void __delayacct_freepages_start(void);
extern void __delayacct_freepages_end(void);
-extern void __delayacct_thrashing_start(void);
-extern void __delayacct_thrashing_end(void);
+extern void __delayacct_thrashing_start(bool *in_thrashing);
+extern void __delayacct_thrashing_end(bool *in_thrashing);
extern void __delayacct_swapin_start(void);
extern void __delayacct_swapin_end(void);
extern void __delayacct_compact_start(void);
@@ -143,22 +143,22 @@ static inline void delayacct_freepages_end(void)
__delayacct_freepages_end();
}
-static inline void delayacct_thrashing_start(void)
+static inline void delayacct_thrashing_start(bool *in_thrashing)
{
if (!static_branch_unlikely(&delayacct_key))
return;
if (current->delays)
- __delayacct_thrashing_start();
+ __delayacct_thrashing_start(in_thrashing);
}
-static inline void delayacct_thrashing_end(void)
+static inline void delayacct_thrashing_end(bool *in_thrashing)
{
if (!static_branch_unlikely(&delayacct_key))
return;
if (current->delays)
- __delayacct_thrashing_end();
+ __delayacct_thrashing_end(in_thrashing);
}
static inline void delayacct_swapin_start(void)
@@ -237,9 +237,9 @@ static inline void delayacct_freepages_start(void)
{}
static inline void delayacct_freepages_end(void)
{}
-static inline void delayacct_thrashing_start(void)
+static inline void delayacct_thrashing_start(bool *in_thrashing)
{}
-static inline void delayacct_thrashing_end(void)
+static inline void delayacct_thrashing_end(bool *in_thrashing)
{}
static inline void delayacct_swapin_start(void)
{}
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index 34aab4dd336c..4dc7cda4fd46 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -152,8 +152,8 @@ struct devfreq_stats {
* @max_state: count of entry present in the frequency table.
* @previous_freq: previously configured frequency value.
* @last_status: devfreq user device info, performance statistics
- * @data: Private data of the governor. The devfreq framework does not
- * touch this.
+ * @data: devfreq driver pass to governors, governor should not change it.
+ * @governor_data: private data for governors, devfreq core doesn't touch it.
* @user_min_freq_req: PM QoS minimum frequency request from user (via sysfs)
* @user_max_freq_req: PM QoS maximum frequency request from user (via sysfs)
* @scaling_min_freq: Limit minimum frequency requested by OPP interface
@@ -193,7 +193,8 @@ struct devfreq {
unsigned long previous_freq;
struct devfreq_dev_status last_status;
- void *data; /* private data for governors */
+ void *data;
+ void *governor_data;
struct dev_pm_qos_request user_min_freq_req;
struct dev_pm_qos_request user_max_freq_req;
diff --git a/include/linux/device.h b/include/linux/device.h
index 424b55df0272..7bcfaf54fea3 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -197,9 +197,9 @@ void devres_remove_group(struct device *dev, void *id);
int devres_release_group(struct device *dev, void *id);
/* managed devm_k.alloc/kfree for device drivers */
-void *devm_kmalloc(struct device *dev, size_t size, gfp_t gfp) __malloc;
+void *devm_kmalloc(struct device *dev, size_t size, gfp_t gfp) __alloc_size(2);
void *devm_krealloc(struct device *dev, void *ptr, size_t size,
- gfp_t gfp) __must_check;
+ gfp_t gfp) __must_check __realloc_size(3);
__printf(3, 0) char *devm_kvasprintf(struct device *dev, gfp_t gfp,
const char *fmt, va_list ap) __malloc;
__printf(3, 4) char *devm_kasprintf(struct device *dev, gfp_t gfp,
@@ -226,7 +226,8 @@ static inline void *devm_kcalloc(struct device *dev,
void devm_kfree(struct device *dev, const void *p);
char *devm_kstrdup(struct device *dev, const char *s, gfp_t gfp) __malloc;
const char *devm_kstrdup_const(struct device *dev, const char *s, gfp_t gfp);
-void *devm_kmemdup(struct device *dev, const void *src, size_t len, gfp_t gfp);
+void *devm_kmemdup(struct device *dev, const void *src, size_t len, gfp_t gfp)
+ __realloc_size(3);
unsigned long devm_get_free_pages(struct device *dev,
gfp_t gfp_mask, unsigned int order);
@@ -378,10 +379,8 @@ struct dev_links_info {
* @data: Pointer to MSI device data
*/
struct dev_msi_info {
-#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
- struct irq_domain *domain;
-#endif
#ifdef CONFIG_GENERIC_MSI_IRQ
+ struct irq_domain *domain;
struct msi_device_data *data;
#endif
};
@@ -742,7 +741,7 @@ static inline void set_dev_node(struct device *dev, int node)
static inline struct irq_domain *dev_get_msi_domain(const struct device *dev)
{
-#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
+#ifdef CONFIG_GENERIC_MSI_IRQ
return dev->msi.domain;
#else
return NULL;
@@ -751,7 +750,7 @@ static inline struct irq_domain *dev_get_msi_domain(const struct device *dev)
static inline void dev_set_msi_domain(struct device *dev, struct irq_domain *d)
{
-#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
+#ifdef CONFIG_GENERIC_MSI_IRQ
dev->msi.domain = d;
#endif
}
diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h
index 7acaabde5396..2114d65b862f 100644
--- a/include/linux/device/driver.h
+++ b/include/linux/device/driver.h
@@ -242,6 +242,7 @@ driver_find_device_by_acpi_dev(struct device_driver *drv, const void *adev)
extern int driver_deferred_probe_timeout;
void driver_deferred_probe_add(struct device *dev);
+int driver_deferred_probe_check_state(struct device *dev);
void driver_init(void);
/**
diff --git a/include/linux/dlm.h b/include/linux/dlm.h
index ff951e9f6f20..c6bc2b5ee7e6 100644
--- a/include/linux/dlm.h
+++ b/include/linux/dlm.h
@@ -56,9 +56,6 @@ struct dlm_lockspace_ops {
* DLM_LSFL_TIMEWARN
* The dlm should emit netlink messages if locks have been waiting
* for a configurable amount of time. (Unused.)
- * DLM_LSFL_FS
- * The lockspace user is in the kernel (i.e. filesystem). Enables
- * direct bast/cast callbacks.
* DLM_LSFL_NEWEXCL
* dlm_new_lockspace() should return -EEXIST if the lockspace exists.
*
@@ -134,7 +131,7 @@ int dlm_lock(dlm_lockspace_t *lockspace,
int mode,
struct dlm_lksb *lksb,
uint32_t flags,
- void *name,
+ const void *name,
unsigned int namelen,
uint32_t parent_lkid,
void (*lockast) (void *astarg),
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 71731796c8c3..6fa8d4e29719 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -327,15 +327,6 @@ struct dma_buf {
const struct dma_buf_ops *ops;
/**
- * @lock:
- *
- * Used internally to serialize list manipulation, attach/detach and
- * vmap/unmap. Note that in many cases this is superseeded by
- * dma_resv_lock() on @resv.
- */
- struct mutex lock;
-
- /**
* @vmapping_counter:
*
* Used internally to refcnt the vmaps returned by dma_buf_vmap().
@@ -627,9 +618,17 @@ int dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
enum dma_data_direction dir);
int dma_buf_end_cpu_access(struct dma_buf *dma_buf,
enum dma_data_direction dir);
+struct sg_table *
+dma_buf_map_attachment_unlocked(struct dma_buf_attachment *attach,
+ enum dma_data_direction direction);
+void dma_buf_unmap_attachment_unlocked(struct dma_buf_attachment *attach,
+ struct sg_table *sg_table,
+ enum dma_data_direction direction);
int dma_buf_mmap(struct dma_buf *, struct vm_area_struct *,
unsigned long);
int dma_buf_vmap(struct dma_buf *dmabuf, struct iosys_map *map);
void dma_buf_vunmap(struct dma_buf *dmabuf, struct iosys_map *map);
+int dma_buf_vmap_unlocked(struct dma_buf *dmabuf, struct iosys_map *map);
+void dma_buf_vunmap_unlocked(struct dma_buf *dmabuf, struct iosys_map *map);
#endif /* __DMA_BUF_H__ */
diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
deleted file mode 100644
index 24607dc3c2ac..000000000000
--- a/include/linux/dma-iommu.h
+++ /dev/null
@@ -1,93 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2014-2015 ARM Ltd.
- */
-#ifndef __DMA_IOMMU_H
-#define __DMA_IOMMU_H
-
-#include <linux/errno.h>
-#include <linux/types.h>
-
-#ifdef CONFIG_IOMMU_DMA
-#include <linux/dma-mapping.h>
-#include <linux/iommu.h>
-#include <linux/msi.h>
-
-/* Domain management interface for IOMMU drivers */
-int iommu_get_dma_cookie(struct iommu_domain *domain);
-int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base);
-void iommu_put_dma_cookie(struct iommu_domain *domain);
-
-/* Setup call for arch DMA mapping code */
-void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit);
-int iommu_dma_init_fq(struct iommu_domain *domain);
-
-/* The DMA API isn't _quite_ the whole story, though... */
-/*
- * iommu_dma_prepare_msi() - Map the MSI page in the IOMMU device
- *
- * The MSI page will be stored in @desc.
- *
- * Return: 0 on success otherwise an error describing the failure.
- */
-int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr);
-
-/* Update the MSI message if required. */
-void iommu_dma_compose_msi_msg(struct msi_desc *desc,
- struct msi_msg *msg);
-
-void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list);
-
-void iommu_dma_free_cpu_cached_iovas(unsigned int cpu,
- struct iommu_domain *domain);
-
-extern bool iommu_dma_forcedac;
-
-#else /* CONFIG_IOMMU_DMA */
-
-struct iommu_domain;
-struct msi_desc;
-struct msi_msg;
-struct device;
-
-static inline void iommu_setup_dma_ops(struct device *dev, u64 dma_base,
- u64 dma_limit)
-{
-}
-
-static inline int iommu_dma_init_fq(struct iommu_domain *domain)
-{
- return -EINVAL;
-}
-
-static inline int iommu_get_dma_cookie(struct iommu_domain *domain)
-{
- return -ENODEV;
-}
-
-static inline int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base)
-{
- return -ENODEV;
-}
-
-static inline void iommu_put_dma_cookie(struct iommu_domain *domain)
-{
-}
-
-static inline int iommu_dma_prepare_msi(struct msi_desc *desc,
- phys_addr_t msi_addr)
-{
- return 0;
-}
-
-static inline void iommu_dma_compose_msi_msg(struct msi_desc *desc,
- struct msi_msg *msg)
-{
-}
-
-static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list)
-{
-}
-
-#endif /* CONFIG_IOMMU_DMA */
-#endif /* __DMA_IOMMU_H */
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 25a30906289d..0ee20b764000 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -139,7 +139,6 @@ int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
void *cpu_addr, dma_addr_t dma_addr, size_t size,
unsigned long attrs);
bool dma_can_mmap(struct device *dev);
-int dma_supported(struct device *dev, u64 mask);
bool dma_pci_p2pdma_supported(struct device *dev);
int dma_set_mask(struct device *dev, u64 mask);
int dma_set_coherent_mask(struct device *dev, u64 mask);
@@ -248,10 +247,6 @@ static inline bool dma_can_mmap(struct device *dev)
{
return false;
}
-static inline int dma_supported(struct device *dev, u64 mask)
-{
- return 0;
-}
static inline bool dma_pci_p2pdma_supported(struct device *dev)
{
return false;
diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
index c8ccbc94d5d2..0637659a702c 100644
--- a/include/linux/dma-resv.h
+++ b/include/linux/dma-resv.h
@@ -62,6 +62,11 @@ struct dma_resv_list;
* For example when asking for WRITE fences then the KERNEL fences are returned
* as well. Similar when asked for READ fences then both WRITE and KERNEL
* fences are returned as well.
+ *
+ * Already used fences can be promoted in the sense that a fence with
+ * DMA_RESV_USAGE_BOOKKEEP could become DMA_RESV_USAGE_READ by adding it again
+ * with this usage. But fences can never be degraded in the sense that a fence
+ * with DMA_RESV_USAGE_WRITE could become DMA_RESV_USAGE_READ.
*/
enum dma_resv_usage {
/**
@@ -98,10 +103,15 @@ enum dma_resv_usage {
* @DMA_RESV_USAGE_BOOKKEEP: No implicit sync.
*
* This should be used by submissions which don't want to participate in
- * implicit synchronization.
+ * any implicit synchronization.
+ *
+ * The most common case are preemption fences, page table updates, TLB
+ * flushes as well as explicit synced user submissions.
*
- * The most common case are preemption fences as well as page table
- * updates and their TLB flushes.
+ * Explicit synced user user submissions can be promoted to
+ * DMA_RESV_USAGE_READ or DMA_RESV_USAGE_WRITE as needed using
+ * dma_buf_import_sync_file() when implicit synchronization should
+ * become necessary after initial adding of the fence.
*/
DMA_RESV_USAGE_BOOKKEEP
};
diff --git a/include/linux/dma/hsu.h b/include/linux/dma/hsu.h
index a6b7bc707356..77ea602c287c 100644
--- a/include/linux/dma/hsu.h
+++ b/include/linux/dma/hsu.h
@@ -8,11 +8,13 @@
#ifndef _DMA_HSU_H
#define _DMA_HSU_H
-#include <linux/device.h>
-#include <linux/interrupt.h>
+#include <linux/errno.h>
+#include <linux/kconfig.h>
+#include <linux/types.h>
#include <linux/platform_data/dma-hsu.h>
+struct device;
struct hsu_dma;
/**
diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h
index 3ed117e299ec..f3664ee12170 100644
--- a/include/linux/dsa/8021q.h
+++ b/include/linux/dsa/8021q.h
@@ -5,28 +5,8 @@
#ifndef _NET_DSA_8021Q_H
#define _NET_DSA_8021Q_H
-#include <linux/refcount.h>
-#include <linux/types.h>
#include <net/dsa.h>
-
-struct dsa_switch;
-struct dsa_port;
-struct sk_buff;
-struct net_device;
-
-struct dsa_tag_8021q_vlan {
- struct list_head list;
- int port;
- u16 vid;
- refcount_t refcount;
-};
-
-struct dsa_8021q_context {
- struct dsa_switch *ds;
- struct list_head vlans;
- /* EtherType of RX VID, used for filtering on master interface */
- __be16 proto;
-};
+#include <linux/types.h>
int dsa_tag_8021q_register(struct dsa_switch *ds, __be16 proto);
@@ -38,15 +18,6 @@ int dsa_tag_8021q_bridge_join(struct dsa_switch *ds, int port,
void dsa_tag_8021q_bridge_leave(struct dsa_switch *ds, int port,
struct dsa_bridge bridge);
-struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev,
- u16 tpid, u16 tci);
-
-void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id,
- int *vbid);
-
-struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *master,
- int vbid);
-
u16 dsa_tag_8021q_bridge_vid(unsigned int bridge_num);
u16 dsa_tag_8021q_standalone_vid(const struct dsa_port *dp);
diff --git a/include/linux/dsa/tag_qca.h b/include/linux/dsa/tag_qca.h
index 50be7cbd93a5..b1b5720d89a5 100644
--- a/include/linux/dsa/tag_qca.h
+++ b/include/linux/dsa/tag_qca.h
@@ -61,9 +61,9 @@ struct sk_buff;
/* Special struct emulating a Ethernet header */
struct qca_mgmt_ethhdr {
- u32 command; /* command bit 31:0 */
- u32 seq; /* seq 63:32 */
- u32 mdio_data; /* first 4byte mdio */
+ __le32 command; /* command bit 31:0 */
+ __le32 seq; /* seq 63:32 */
+ __le32 mdio_data; /* first 4byte mdio */
__be16 hdr; /* qca hdr */
} __packed;
@@ -73,7 +73,7 @@ enum mdio_cmd {
};
struct mib_ethhdr {
- u32 data[3]; /* first 3 mib counter */
+ __le32 data[3]; /* first 3 mib counter */
__be16 hdr; /* qca hdr */
} __packed;
diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index dce631e678dd..41682278d2e8 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -6,6 +6,8 @@
#include <linux/jump_label.h>
#endif
+#include <linux/build_bug.h>
+
/*
* An instance of this structure is created in a special
* ELF section at every dynamic debug callsite. At runtime,
@@ -21,6 +23,9 @@ struct _ddebug {
const char *filename;
const char *format;
unsigned int lineno:18;
+#define CLS_BITS 6
+ unsigned int class_id:CLS_BITS;
+#define _DPRINTK_CLASS_DFLT ((1 << CLS_BITS) - 1)
/*
* The flags field controls the behaviour at the callsite.
* The bits here are changed dynamically when the user
@@ -51,15 +56,82 @@ struct _ddebug {
#endif
} __attribute__((aligned(8)));
-
+enum class_map_type {
+ DD_CLASS_TYPE_DISJOINT_BITS,
+ /**
+ * DD_CLASS_TYPE_DISJOINT_BITS: classes are independent, one per bit.
+ * expecting hex input. Built for drm.debug, basis for other types.
+ */
+ DD_CLASS_TYPE_LEVEL_NUM,
+ /**
+ * DD_CLASS_TYPE_LEVEL_NUM: input is numeric level, 0-N.
+ * N turns on just bits N-1 .. 0, so N=0 turns all bits off.
+ */
+ DD_CLASS_TYPE_DISJOINT_NAMES,
+ /**
+ * DD_CLASS_TYPE_DISJOINT_NAMES: input is a CSV of [+-]CLASS_NAMES,
+ * classes are independent, like _DISJOINT_BITS.
+ */
+ DD_CLASS_TYPE_LEVEL_NAMES,
+ /**
+ * DD_CLASS_TYPE_LEVEL_NAMES: input is a CSV of [+-]CLASS_NAMES,
+ * intended for names like: INFO,DEBUG,TRACE, with a module prefix
+ * avoid EMERG,ALERT,CRIT,ERR,WARNING: they're not debug
+ */
+};
+
+struct ddebug_class_map {
+ struct list_head link;
+ struct module *mod;
+ const char *mod_name; /* needed for builtins */
+ const char **class_names;
+ const int length;
+ const int base; /* index of 1st .class_id, allows split/shared space */
+ enum class_map_type map_type;
+};
+
+/**
+ * DECLARE_DYNDBG_CLASSMAP - declare classnames known by a module
+ * @_var: a struct ddebug_class_map, passed to module_param_cb
+ * @_type: enum class_map_type, chooses bits/verbose, numeric/symbolic
+ * @_base: offset of 1st class-name. splits .class_id space
+ * @classes: class-names used to control class'd prdbgs
+ */
+#define DECLARE_DYNDBG_CLASSMAP(_var, _maptype, _base, ...) \
+ static const char *_var##_classnames[] = { __VA_ARGS__ }; \
+ static struct ddebug_class_map __aligned(8) __used \
+ __section("__dyndbg_classes") _var = { \
+ .mod = THIS_MODULE, \
+ .mod_name = KBUILD_MODNAME, \
+ .base = _base, \
+ .map_type = _maptype, \
+ .length = NUM_TYPE_ARGS(char*, __VA_ARGS__), \
+ .class_names = _var##_classnames, \
+ }
+#define NUM_TYPE_ARGS(eltype, ...) \
+ (sizeof((eltype[]){__VA_ARGS__}) / sizeof(eltype))
+
+/* encapsulate linker provided built-in (or module) dyndbg data */
+struct _ddebug_info {
+ struct _ddebug *descs;
+ struct ddebug_class_map *classes;
+ unsigned int num_descs;
+ unsigned int num_classes;
+};
+
+struct ddebug_class_param {
+ union {
+ unsigned long *bits;
+ unsigned int *lvl;
+ };
+ char flags[8];
+ const struct ddebug_class_map *map;
+};
#if defined(CONFIG_DYNAMIC_DEBUG_CORE)
-/* exported for module authors to exercise >control */
-int dynamic_debug_exec_queries(const char *query, const char *modname);
+int ddebug_add_module(struct _ddebug_info *dyndbg, const char *modname);
-int ddebug_add_module(struct _ddebug *tab, unsigned int n,
- const char *modname);
extern int ddebug_remove_module(const char *mod_name);
extern __printf(2, 3)
void __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...);
@@ -87,7 +159,7 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor,
const struct ib_device *ibdev,
const char *fmt, ...);
-#define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt) \
+#define DEFINE_DYNAMIC_DEBUG_METADATA_CLS(name, cls, fmt) \
static struct _ddebug __aligned(8) \
__section("__dyndbg") name = { \
.modname = KBUILD_MODNAME, \
@@ -96,8 +168,14 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor,
.format = (fmt), \
.lineno = __LINE__, \
.flags = _DPRINTK_FLAGS_DEFAULT, \
+ .class_id = cls, \
_DPRINTK_KEY_INIT \
- }
+ }; \
+ BUILD_BUG_ON_MSG(cls > _DPRINTK_CLASS_DFLT, \
+ "classid value overflow")
+
+#define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt) \
+ DEFINE_DYNAMIC_DEBUG_METADATA_CLS(name, _DPRINTK_CLASS_DFLT, fmt)
#ifdef CONFIG_JUMP_LABEL
@@ -128,17 +206,34 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor,
#endif /* CONFIG_JUMP_LABEL */
-#define __dynamic_func_call(id, fmt, func, ...) do { \
- DEFINE_DYNAMIC_DEBUG_METADATA(id, fmt); \
- if (DYNAMIC_DEBUG_BRANCH(id)) \
- func(&id, ##__VA_ARGS__); \
-} while (0)
-
-#define __dynamic_func_call_no_desc(id, fmt, func, ...) do { \
- DEFINE_DYNAMIC_DEBUG_METADATA(id, fmt); \
+/*
+ * Factory macros: ($prefix)dynamic_func_call($suffix)
+ *
+ * Lower layer (with __ prefix) gets the callsite metadata, and wraps
+ * the func inside a debug-branch/static-key construct. Upper layer
+ * (with _ prefix) does the UNIQUE_ID once, so that lower can ref the
+ * name/label multiple times, and tie the elements together.
+ * Multiple flavors:
+ * (|_cls): adds in _DPRINT_CLASS_DFLT as needed
+ * (|_no_desc): former gets callsite descriptor as 1st arg (for prdbgs)
+ */
+#define __dynamic_func_call_cls(id, cls, fmt, func, ...) do { \
+ DEFINE_DYNAMIC_DEBUG_METADATA_CLS(id, cls, fmt); \
if (DYNAMIC_DEBUG_BRANCH(id)) \
- func(__VA_ARGS__); \
+ func(&id, ##__VA_ARGS__); \
+} while (0)
+#define __dynamic_func_call(id, fmt, func, ...) \
+ __dynamic_func_call_cls(id, _DPRINTK_CLASS_DFLT, fmt, \
+ func, ##__VA_ARGS__)
+
+#define __dynamic_func_call_cls_no_desc(id, cls, fmt, func, ...) do { \
+ DEFINE_DYNAMIC_DEBUG_METADATA_CLS(id, cls, fmt); \
+ if (DYNAMIC_DEBUG_BRANCH(id)) \
+ func(__VA_ARGS__); \
} while (0)
+#define __dynamic_func_call_no_desc(id, fmt, func, ...) \
+ __dynamic_func_call_cls_no_desc(id, _DPRINTK_CLASS_DFLT, \
+ fmt, func, ##__VA_ARGS__)
/*
* "Factory macro" for generating a call to func, guarded by a
@@ -148,22 +243,33 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor,
* the varargs. Note that fmt is repeated in invocations of this
* macro.
*/
+#define _dynamic_func_call_cls(cls, fmt, func, ...) \
+ __dynamic_func_call_cls(__UNIQUE_ID(ddebug), cls, fmt, func, ##__VA_ARGS__)
#define _dynamic_func_call(fmt, func, ...) \
- __dynamic_func_call(__UNIQUE_ID(ddebug), fmt, func, ##__VA_ARGS__)
+ _dynamic_func_call_cls(_DPRINTK_CLASS_DFLT, fmt, func, ##__VA_ARGS__)
+
/*
* A variant that does the same, except that the descriptor is not
* passed as the first argument to the function; it is only called
* with precisely the macro's varargs.
*/
-#define _dynamic_func_call_no_desc(fmt, func, ...) \
- __dynamic_func_call_no_desc(__UNIQUE_ID(ddebug), fmt, func, ##__VA_ARGS__)
+#define _dynamic_func_call_cls_no_desc(cls, fmt, func, ...) \
+ __dynamic_func_call_cls_no_desc(__UNIQUE_ID(ddebug), cls, fmt, \
+ func, ##__VA_ARGS__)
+#define _dynamic_func_call_no_desc(fmt, func, ...) \
+ _dynamic_func_call_cls_no_desc(_DPRINTK_CLASS_DFLT, fmt, \
+ func, ##__VA_ARGS__)
+
+#define dynamic_pr_debug_cls(cls, fmt, ...) \
+ _dynamic_func_call_cls(cls, fmt, __dynamic_pr_debug, \
+ pr_fmt(fmt), ##__VA_ARGS__)
#define dynamic_pr_debug(fmt, ...) \
- _dynamic_func_call(fmt, __dynamic_pr_debug, \
+ _dynamic_func_call(fmt, __dynamic_pr_debug, \
pr_fmt(fmt), ##__VA_ARGS__)
#define dynamic_dev_dbg(dev, fmt, ...) \
- _dynamic_func_call(fmt,__dynamic_dev_dbg, \
+ _dynamic_func_call(fmt, __dynamic_dev_dbg, \
dev, fmt, ##__VA_ARGS__)
#define dynamic_netdev_dbg(dev, fmt, ...) \
@@ -181,14 +287,24 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor,
KERN_DEBUG, prefix_str, prefix_type, \
rowsize, groupsize, buf, len, ascii)
+struct kernel_param;
+int param_set_dyndbg_classes(const char *instr, const struct kernel_param *kp);
+int param_get_dyndbg_classes(char *buffer, const struct kernel_param *kp);
+
+/* for test only, generally expect drm.debug style macro wrappers */
+#define __pr_debug_cls(cls, fmt, ...) do { \
+ BUILD_BUG_ON_MSG(!__builtin_constant_p(cls), \
+ "expecting constant class int/enum"); \
+ dynamic_pr_debug_cls(cls, fmt, ##__VA_ARGS__); \
+ } while (0)
+
#else /* !CONFIG_DYNAMIC_DEBUG_CORE */
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/printk.h>
-static inline int ddebug_add_module(struct _ddebug *tab, unsigned int n,
- const char *modname)
+static inline int ddebug_add_module(struct _ddebug_info *dinfo, const char *modname)
{
return 0;
}
@@ -201,7 +317,7 @@ static inline int ddebug_remove_module(const char *mod)
static inline int ddebug_dyndbg_module_param_cb(char *param, char *val,
const char *modname)
{
- if (strstr(param, "dyndbg")) {
+ if (!strcmp(param, "dyndbg")) {
/* avoid pr_warn(), which wants pr_fmt() fully defined */
printk(KERN_WARNING "dyndbg param is supported only in "
"CONFIG_DYNAMIC_DEBUG builds\n");
@@ -221,12 +337,14 @@ static inline int ddebug_dyndbg_module_param_cb(char *param, char *val,
rowsize, groupsize, buf, len, ascii); \
} while (0)
-static inline int dynamic_debug_exec_queries(const char *query, const char *modname)
-{
- pr_warn("kernel not built with CONFIG_DYNAMIC_DEBUG_CORE\n");
- return 0;
-}
+struct kernel_param;
+static inline int param_set_dyndbg_classes(const char *instr, const struct kernel_param *kp)
+{ return 0; }
+static inline int param_get_dyndbg_classes(char *buffer, const struct kernel_param *kp)
+{ return 0; }
#endif /* !CONFIG_DYNAMIC_DEBUG_CORE */
+extern const struct kernel_param_ops param_ops_dyndbg_classes;
+
#endif
diff --git a/include/linux/edac.h b/include/linux/edac.h
index e730b3468719..fa4bda2a70f6 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -231,21 +231,21 @@ enum mem_type {
#define MEM_FLAG_DDR BIT(MEM_DDR)
#define MEM_FLAG_RDDR BIT(MEM_RDDR)
#define MEM_FLAG_RMBS BIT(MEM_RMBS)
-#define MEM_FLAG_DDR2 BIT(MEM_DDR2)
-#define MEM_FLAG_FB_DDR2 BIT(MEM_FB_DDR2)
-#define MEM_FLAG_RDDR2 BIT(MEM_RDDR2)
-#define MEM_FLAG_XDR BIT(MEM_XDR)
-#define MEM_FLAG_DDR3 BIT(MEM_DDR3)
-#define MEM_FLAG_RDDR3 BIT(MEM_RDDR3)
-#define MEM_FLAG_LPDDR3 BIT(MEM_LPDDR3)
-#define MEM_FLAG_DDR4 BIT(MEM_DDR4)
-#define MEM_FLAG_RDDR4 BIT(MEM_RDDR4)
-#define MEM_FLAG_LRDDR4 BIT(MEM_LRDDR4)
-#define MEM_FLAG_LPDDR4 BIT(MEM_LPDDR4)
-#define MEM_FLAG_DDR5 BIT(MEM_DDR5)
-#define MEM_FLAG_RDDR5 BIT(MEM_RDDR5)
-#define MEM_FLAG_LRDDR5 BIT(MEM_LRDDR5)
-#define MEM_FLAG_NVDIMM BIT(MEM_NVDIMM)
+#define MEM_FLAG_DDR2 BIT(MEM_DDR2)
+#define MEM_FLAG_FB_DDR2 BIT(MEM_FB_DDR2)
+#define MEM_FLAG_RDDR2 BIT(MEM_RDDR2)
+#define MEM_FLAG_XDR BIT(MEM_XDR)
+#define MEM_FLAG_DDR3 BIT(MEM_DDR3)
+#define MEM_FLAG_RDDR3 BIT(MEM_RDDR3)
+#define MEM_FLAG_LPDDR3 BIT(MEM_LPDDR3)
+#define MEM_FLAG_DDR4 BIT(MEM_DDR4)
+#define MEM_FLAG_RDDR4 BIT(MEM_RDDR4)
+#define MEM_FLAG_LRDDR4 BIT(MEM_LRDDR4)
+#define MEM_FLAG_LPDDR4 BIT(MEM_LPDDR4)
+#define MEM_FLAG_DDR5 BIT(MEM_DDR5)
+#define MEM_FLAG_RDDR5 BIT(MEM_RDDR5)
+#define MEM_FLAG_LRDDR5 BIT(MEM_LRDDR5)
+#define MEM_FLAG_NVDIMM BIT(MEM_NVDIMM)
#define MEM_FLAG_WIO2 BIT(MEM_WIO2)
#define MEM_FLAG_HBM2 BIT(MEM_HBM2)
diff --git a/include/linux/efi.h b/include/linux/efi.h
index d2b84c2fec39..4b27519143f5 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -368,6 +368,10 @@ void efi_native_runtime_setup(void);
#define UV_SYSTEM_TABLE_GUID EFI_GUID(0x3b13a7d4, 0x633e, 0x11dd, 0x93, 0xec, 0xda, 0x25, 0x56, 0xd8, 0x95, 0x93)
#define LINUX_EFI_CRASH_GUID EFI_GUID(0xcfc8fc79, 0xbe2e, 0x4ddc, 0x97, 0xf0, 0x9f, 0x98, 0xbf, 0xe2, 0x98, 0xa0)
#define LOADED_IMAGE_PROTOCOL_GUID EFI_GUID(0x5b1b31a1, 0x9562, 0x11d2, 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b)
+#define LOADED_IMAGE_DEVICE_PATH_PROTOCOL_GUID EFI_GUID(0xbc62157e, 0x3e33, 0x4fec, 0x99, 0x20, 0x2d, 0x3b, 0x36, 0xd7, 0x50, 0xdf)
+#define EFI_DEVICE_PATH_PROTOCOL_GUID EFI_GUID(0x09576e91, 0x6d3f, 0x11d2, 0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b)
+#define EFI_DEVICE_PATH_TO_TEXT_PROTOCOL_GUID EFI_GUID(0x8b843e20, 0x8132, 0x4852, 0x90, 0xcc, 0x55, 0x1a, 0x4e, 0x4a, 0x7f, 0x1c)
+#define EFI_DEVICE_PATH_FROM_TEXT_PROTOCOL_GUID EFI_GUID(0x05c99a21, 0xc70f, 0x4ad2, 0x8a, 0x5f, 0x35, 0xdf, 0x33, 0x43, 0xf5, 0x1e)
#define EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID EFI_GUID(0x9042a9de, 0x23dc, 0x4a38, 0x96, 0xfb, 0x7a, 0xde, 0xd0, 0x80, 0x51, 0x6a)
#define EFI_UGA_PROTOCOL_GUID EFI_GUID(0x982c298b, 0xf4fa, 0x41cb, 0xb8, 0x38, 0x77, 0xaa, 0x68, 0x8f, 0xb8, 0x39)
#define EFI_PCI_IO_PROTOCOL_GUID EFI_GUID(0x4cf5b200, 0x68b8, 0x4ca5, 0x9e, 0xec, 0xb2, 0x3e, 0x3f, 0x50, 0x02, 0x9a)
@@ -386,6 +390,7 @@ void efi_native_runtime_setup(void);
#define EFI_LOAD_FILE2_PROTOCOL_GUID EFI_GUID(0x4006c0c1, 0xfcb3, 0x403e, 0x99, 0x6d, 0x4a, 0x6c, 0x87, 0x24, 0xe0, 0x6d)
#define EFI_RT_PROPERTIES_TABLE_GUID EFI_GUID(0xeb66918a, 0x7eef, 0x402a, 0x84, 0x2e, 0x93, 0x1d, 0x21, 0xc3, 0x8a, 0xe9)
#define EFI_DXE_SERVICES_TABLE_GUID EFI_GUID(0x05ad34ba, 0x6f02, 0x4214, 0x95, 0x2e, 0x4d, 0xa0, 0x39, 0x8e, 0x2b, 0xb9)
+#define EFI_SMBIOS_PROTOCOL_GUID EFI_GUID(0x03583ff6, 0xcb36, 0x4940, 0x94, 0x7e, 0xb9, 0xb3, 0x9f, 0x4a, 0xfa, 0xf7)
#define EFI_IMAGE_SECURITY_DATABASE_GUID EFI_GUID(0xd719b2cb, 0x3d3a, 0x4596, 0xa3, 0xbc, 0xda, 0xd0, 0x0e, 0x67, 0x65, 0x6f)
#define EFI_SHIM_LOCK_GUID EFI_GUID(0x605dab50, 0xe046, 0x4300, 0xab, 0xb6, 0x3d, 0xd8, 0x10, 0xdd, 0x8b, 0x23)
@@ -400,7 +405,7 @@ void efi_native_runtime_setup(void);
* structure that was populated by the stub based on the GOP protocol instance
* associated with ConOut
*/
-#define LINUX_EFI_ARM_SCREEN_INFO_TABLE_GUID EFI_GUID(0xe03fc20a, 0x85dc, 0x406e, 0xb9, 0x0e, 0x4a, 0xb5, 0x02, 0x37, 0x1d, 0x95)
+#define LINUX_EFI_SCREEN_INFO_TABLE_GUID EFI_GUID(0xe03fc20a, 0x85dc, 0x406e, 0xb9, 0x0e, 0x4a, 0xb5, 0x02, 0x37, 0x1d, 0x95)
#define LINUX_EFI_ARM_CPU_STATE_TABLE_GUID EFI_GUID(0xef79e4aa, 0x3c3d, 0x4989, 0xb9, 0x02, 0x07, 0xa9, 0x43, 0xe5, 0x50, 0xd2)
#define LINUX_EFI_LOADER_ENTRY_GUID EFI_GUID(0x4a67b082, 0x0a4c, 0x41cf, 0xb6, 0xc7, 0x44, 0x0b, 0x29, 0xbb, 0x8c, 0x4f)
#define LINUX_EFI_RANDOM_SEED_TABLE_GUID EFI_GUID(0x1ce1e5bc, 0x7ceb, 0x42f2, 0x81, 0xe5, 0x8a, 0xad, 0xf1, 0x80, 0xf5, 0x7b)
@@ -410,6 +415,7 @@ void efi_native_runtime_setup(void);
#define LINUX_EFI_INITRD_MEDIA_GUID EFI_GUID(0x5568e427, 0x68fc, 0x4f3d, 0xac, 0x74, 0xca, 0x55, 0x52, 0x31, 0xcc, 0x68)
#define LINUX_EFI_MOK_VARIABLE_TABLE_GUID EFI_GUID(0xc451ed2b, 0x9694, 0x45d3, 0xba, 0xba, 0xed, 0x9f, 0x89, 0x88, 0xa3, 0x89)
#define LINUX_EFI_COCO_SECRET_AREA_GUID EFI_GUID(0xadf956ad, 0xe98c, 0x484c, 0xae, 0x11, 0xb5, 0x1c, 0x7d, 0x33, 0x64, 0x47)
+#define LINUX_EFI_BOOT_MEMMAP_GUID EFI_GUID(0x800f683f, 0xd08b, 0x423a, 0xa2, 0x93, 0x96, 0x5c, 0x3c, 0x6f, 0xe2, 0xb4)
#define RISCV_EFI_BOOT_PROTOCOL_GUID EFI_GUID(0xccd15fec, 0x6f73, 0x4eec, 0x83, 0x95, 0x3e, 0x69, 0xe4, 0xb9, 0x40, 0xbf)
@@ -518,6 +524,15 @@ typedef union {
efi_system_table_32_t mixed_mode;
} efi_system_table_t;
+struct efi_boot_memmap {
+ unsigned long map_size;
+ unsigned long desc_size;
+ u32 desc_ver;
+ unsigned long map_key;
+ unsigned long buff_size;
+ efi_memory_desc_t map[];
+};
+
/*
* Architecture independent structure for describing a memory map for the
* benefit of efi_memmap_init_early(), and for passing context between
@@ -692,18 +707,10 @@ static inline efi_status_t efi_query_variable_store(u32 attributes,
#endif
extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr);
-extern int __init efi_memmap_alloc(unsigned int num_entries,
- struct efi_memory_map_data *data);
-extern void __efi_memmap_free(u64 phys, unsigned long size,
- unsigned long flags);
+extern int __init __efi_memmap_init(struct efi_memory_map_data *data);
extern int __init efi_memmap_init_early(struct efi_memory_map_data *data);
extern int __init efi_memmap_init_late(phys_addr_t addr, unsigned long size);
extern void __init efi_memmap_unmap(void);
-extern int __init efi_memmap_install(struct efi_memory_map_data *data);
-extern int __init efi_memmap_split_count(efi_memory_desc_t *md,
- struct range *range);
-extern void __init efi_memmap_insert(struct efi_memory_map *old_memmap,
- void *buf, struct efi_mem_range *mem);
#ifdef CONFIG_EFI_ESRT
extern void __init efi_esrt_init(void);
@@ -734,12 +741,6 @@ extern struct kobject *efi_kobj;
extern int efi_reboot_quirk_mode;
extern bool efi_poweroff_required(void);
-#ifdef CONFIG_EFI_FAKE_MEMMAP
-extern void __init efi_fake_memmap(void);
-#else
-static inline void efi_fake_memmap(void) { }
-#endif
-
extern unsigned long efi_mem_attr_table;
/*
@@ -952,6 +953,7 @@ extern int efi_status_to_err(efi_status_t status);
#define EFI_DEV_MEDIA_VENDOR 3
#define EFI_DEV_MEDIA_FILE 4
#define EFI_DEV_MEDIA_PROTOCOL 5
+#define EFI_DEV_MEDIA_REL_OFFSET 8
#define EFI_DEV_BIOS_BOOT 0x05
#define EFI_DEV_END_PATH 0x7F
#define EFI_DEV_END_PATH2 0xFF
@@ -982,12 +984,32 @@ struct efi_vendor_dev_path {
u8 vendordata[];
} __packed;
+struct efi_rel_offset_dev_path {
+ struct efi_generic_dev_path header;
+ u32 reserved;
+ u64 starting_offset;
+ u64 ending_offset;
+} __packed;
+
+struct efi_mem_mapped_dev_path {
+ struct efi_generic_dev_path header;
+ u32 memory_type;
+ u64 starting_addr;
+ u64 ending_addr;
+} __packed;
+
+struct efi_file_path_dev_path {
+ struct efi_generic_dev_path header;
+ efi_char16_t filename[];
+} __packed;
+
struct efi_dev_path {
union {
struct efi_generic_dev_path header;
struct efi_acpi_dev_path acpi;
struct efi_pci_dev_path pci;
struct efi_vendor_dev_path vendor;
+ struct efi_rel_offset_dev_path rel_offset;
};
} __packed;
@@ -1055,9 +1077,6 @@ efi_status_t efivar_set_variable_locked(efi_char16_t *name, efi_guid_t *vendor,
efi_status_t efivar_set_variable(efi_char16_t *name, efi_guid_t *vendor,
u32 attr, unsigned long data_size, void *data);
-efi_status_t check_var_size(u32 attributes, unsigned long size);
-efi_status_t check_var_size_nonblocking(u32 attributes, unsigned long size);
-
#if IS_ENABLED(CONFIG_EFI_CAPSULE_LOADER)
extern bool efi_capsule_pending(int *reset_type);
@@ -1070,34 +1089,6 @@ extern int efi_capsule_update(efi_capsule_header_t *capsule,
static inline bool efi_capsule_pending(int *reset_type) { return false; }
#endif
-#ifdef CONFIG_EFI_RUNTIME_MAP
-int efi_runtime_map_init(struct kobject *);
-int efi_get_runtime_map_size(void);
-int efi_get_runtime_map_desc_size(void);
-int efi_runtime_map_copy(void *buf, size_t bufsz);
-#else
-static inline int efi_runtime_map_init(struct kobject *kobj)
-{
- return 0;
-}
-
-static inline int efi_get_runtime_map_size(void)
-{
- return 0;
-}
-
-static inline int efi_get_runtime_map_desc_size(void)
-{
- return 0;
-}
-
-static inline int efi_runtime_map_copy(void *buf, size_t bufsz)
-{
- return 0;
-}
-
-#endif
-
#ifdef CONFIG_EFI
extern bool efi_runtime_disabled(void);
#else
@@ -1142,8 +1133,6 @@ void efi_check_for_embedded_firmwares(void);
static inline void efi_check_for_embedded_firmwares(void) { }
#endif
-efi_status_t efi_random_get_seed(void);
-
#define arch_efi_call_virt(p, f, args...) ((p)->f(args))
/*
@@ -1195,7 +1184,7 @@ efi_status_t efi_random_get_seed(void);
arch_efi_call_virt_teardown(); \
})
-#define EFI_RANDOM_SEED_SIZE 64U
+#define EFI_RANDOM_SEED_SIZE 32U // BLAKE2S_HASH_SIZE
struct linux_efi_random_seed {
u32 size;
@@ -1321,6 +1310,11 @@ struct linux_efi_coco_secret_area {
u64 size;
};
+struct linux_efi_initrd {
+ unsigned long base;
+ unsigned long size;
+};
+
/* Header of a populated EFI secret area */
#define EFI_SECRET_TABLE_HEADER_GUID EFI_GUID(0x1e74f542, 0x71dd, 0x4d66, 0x96, 0x3e, 0xef, 0x42, 0x87, 0xff, 0x17, 0x3b)
diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h
index 346a8b56cdc8..9ec81290e3c8 100644
--- a/include/linux/elfcore.h
+++ b/include/linux/elfcore.h
@@ -88,22 +88,13 @@ static inline int elf_core_copy_task_regs(struct task_struct *t, elf_gregset_t*
{
#if defined (ELF_CORE_COPY_TASK_REGS)
return ELF_CORE_COPY_TASK_REGS(t, elfregs);
-#elif defined (task_pt_regs)
+#else
elf_core_copy_regs(elfregs, task_pt_regs(t));
#endif
return 0;
}
-extern int dump_fpu (struct pt_regs *, elf_fpregset_t *);
-
-static inline int elf_core_copy_task_fpregs(struct task_struct *t, struct pt_regs *regs, elf_fpregset_t *fpu)
-{
-#ifdef ELF_CORE_COPY_FPREGS
- return ELF_CORE_COPY_FPREGS(t, fpu);
-#else
- return dump_fpu(regs, fpu);
-#endif
-}
+int elf_core_copy_task_fpregs(struct task_struct *t, elf_fpregset_t *fpu);
#ifdef CONFIG_ARCH_BINFMT_ELF_EXTRA_PHDRS
/*
diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index 84a466b176cf..d95ab85f96ba 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -253,7 +253,6 @@ static __always_inline void arch_exit_to_user_mode(void) { }
/**
* arch_do_signal_or_restart - Architecture specific signal delivery function
* @regs: Pointer to currents pt_regs
- * @has_signal: actual signal to handle
*
* Invoked from exit_to_user_mode_loop().
*/
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 92b10e67d5f8..a541f0c4f146 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -428,6 +428,28 @@ static inline bool ether_addr_equal_masked(const u8 *addr1, const u8 *addr2,
return true;
}
+static inline bool ether_addr_is_ipv4_mcast(const u8 *addr)
+{
+ u8 base[ETH_ALEN] = { 0x01, 0x00, 0x5e, 0x00, 0x00, 0x00 };
+ u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0x80, 0x00, 0x00 };
+
+ return ether_addr_equal_masked(addr, base, mask);
+}
+
+static inline bool ether_addr_is_ipv6_mcast(const u8 *addr)
+{
+ u8 base[ETH_ALEN] = { 0x33, 0x33, 0x00, 0x00, 0x00, 0x00 };
+ u8 mask[ETH_ALEN] = { 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 };
+
+ return ether_addr_equal_masked(addr, base, mask);
+}
+
+static inline bool ether_addr_is_ip_mcast(const u8 *addr)
+{
+ return ether_addr_is_ipv4_mcast(addr) ||
+ ether_addr_is_ipv6_mcast(addr);
+}
+
/**
* ether_addr_to_u64 - Convert an Ethernet address into a u64 value.
* @addr: Pointer to a six-byte array containing the Ethernet address
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 99dc7bfbcd3c..9e0a76fc7de9 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -125,6 +125,20 @@ struct ethtool_link_ext_state_info {
};
};
+struct ethtool_link_ext_stats {
+ /* Custom Linux statistic for PHY level link down events.
+ * In a simpler world it should be equal to netdev->carrier_down_count
+ * unfortunately netdev also counts local reconfigurations which don't
+ * actually take the physical link down, not to mention NC-SI which,
+ * if present, keeps the link up regardless of host state.
+ * This statistic counts when PHY _actually_ went down, or lost link.
+ *
+ * Note that we need u64 for ethtool_stats_init() and comparisons
+ * to ETHTOOL_STAT_NOT_SET, but only u32 is exposed to the user.
+ */
+ u64 link_down_events;
+};
+
/**
* ethtool_rxfh_indir_default - get default value for RX flow hash indirection
* @index: Index in RX flow hash indirection table
@@ -459,10 +473,10 @@ struct ethtool_module_power_mode_params {
* parameter.
* @supported_coalesce_params: supported types of interrupt coalescing.
* @supported_ring_params: supported ring params.
- * @get_drvinfo: Report driver/device information. Should only set the
- * @driver, @version, @fw_version and @bus_info fields. If not
- * implemented, the @driver and @bus_info fields will be filled in
- * according to the netdev's parent device.
+ * @get_drvinfo: Report driver/device information. Modern drivers no
+ * longer have to implement this callback. Most fields are
+ * correctly filled in by the core using system information, or
+ * populated using other driver operations.
* @get_regs_len: Get buffer length required for @get_regs
* @get_regs: Get device registers
* @get_wol: Report whether Wake-on-Lan is enabled
@@ -481,6 +495,7 @@ struct ethtool_module_power_mode_params {
* do not attach ext_substate attribute to netlink message). If link_ext_state
* and link_ext_substate are unknown, return -ENODATA. If not implemented,
* link_ext_state and link_ext_substate will not be sent to userspace.
+ * @get_link_ext_stats: Read extra link-related counters.
* @get_eeprom_len: Read range of EEPROM addresses for validation of
* @get_eeprom and @set_eeprom requests.
* Returns 0 if device does not support EEPROM access.
@@ -652,6 +667,8 @@ struct ethtool_ops {
u32 (*get_link)(struct net_device *);
int (*get_link_ext_state)(struct net_device *,
struct ethtool_link_ext_state_info *);
+ void (*get_link_ext_stats)(struct net_device *dev,
+ struct ethtool_link_ext_stats *stats);
int (*get_eeprom_len)(struct net_device *);
int (*get_eeprom)(struct net_device *,
struct ethtool_eeprom *, u8 *);
diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
index 305d5f19093b..36a486505b08 100644
--- a/include/linux/eventfd.h
+++ b/include/linux/eventfd.h
@@ -40,13 +40,14 @@ struct file *eventfd_fget(int fd);
struct eventfd_ctx *eventfd_ctx_fdget(int fd);
struct eventfd_ctx *eventfd_ctx_fileget(struct file *file);
__u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n);
+__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, unsigned mask);
int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait,
__u64 *cnt);
void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt);
static inline bool eventfd_signal_allowed(void)
{
- return !current->in_eventfd_signal;
+ return !current->in_eventfd;
}
#else /* CONFIG_EVENTFD */
@@ -61,7 +62,13 @@ static inline struct eventfd_ctx *eventfd_ctx_fdget(int fd)
return ERR_PTR(-ENOSYS);
}
-static inline int eventfd_signal(struct eventfd_ctx *ctx, int n)
+static inline int eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
+{
+ return -ENOSYS;
+}
+
+static inline int eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n,
+ unsigned mask)
{
return -ENOSYS;
}
diff --git a/include/linux/evm.h b/include/linux/evm.h
index aa63e0b3c0a2..7a9ee2157f69 100644
--- a/include/linux/evm.h
+++ b/include/linux/evm.h
@@ -35,6 +35,27 @@ extern int evm_inode_removexattr(struct user_namespace *mnt_userns,
struct dentry *dentry, const char *xattr_name);
extern void evm_inode_post_removexattr(struct dentry *dentry,
const char *xattr_name);
+static inline void evm_inode_post_remove_acl(struct user_namespace *mnt_userns,
+ struct dentry *dentry,
+ const char *acl_name)
+{
+ evm_inode_post_removexattr(dentry, acl_name);
+}
+extern int evm_inode_set_acl(struct user_namespace *mnt_userns,
+ struct dentry *dentry, const char *acl_name,
+ struct posix_acl *kacl);
+static inline int evm_inode_remove_acl(struct user_namespace *mnt_userns,
+ struct dentry *dentry,
+ const char *acl_name)
+{
+ return evm_inode_set_acl(mnt_userns, dentry, acl_name, NULL);
+}
+static inline void evm_inode_post_set_acl(struct dentry *dentry,
+ const char *acl_name,
+ struct posix_acl *kacl)
+{
+ return evm_inode_post_setxattr(dentry, acl_name, NULL, 0);
+}
extern int evm_inode_init_security(struct inode *inode,
const struct xattr *xattr_array,
struct xattr *evm);
@@ -108,6 +129,34 @@ static inline void evm_inode_post_removexattr(struct dentry *dentry,
return;
}
+static inline void evm_inode_post_remove_acl(struct user_namespace *mnt_userns,
+ struct dentry *dentry,
+ const char *acl_name)
+{
+ return;
+}
+
+static inline int evm_inode_set_acl(struct user_namespace *mnt_userns,
+ struct dentry *dentry, const char *acl_name,
+ struct posix_acl *kacl)
+{
+ return 0;
+}
+
+static inline int evm_inode_remove_acl(struct user_namespace *mnt_userns,
+ struct dentry *dentry,
+ const char *acl_name)
+{
+ return 0;
+}
+
+static inline void evm_inode_post_set_acl(struct dentry *dentry,
+ const char *acl_name,
+ struct posix_acl *kacl)
+{
+ return;
+}
+
static inline int evm_inode_init_security(struct inode *inode,
const struct xattr *xattr_array,
struct xattr *evm)
diff --git a/include/linux/export-internal.h b/include/linux/export-internal.h
index c2b1d4fd5987..fe7e6ba918f1 100644
--- a/include/linux/export-internal.h
+++ b/include/linux/export-internal.h
@@ -10,8 +10,10 @@
#include <linux/compiler.h>
#include <linux/types.h>
-/* __used is needed to keep __crc_* for LTO */
#define SYMBOL_CRC(sym, crc, sec) \
- u32 __section("___kcrctab" sec "+" #sym) __used __crc_##sym = crc
+ asm(".section \"___kcrctab" sec "+" #sym "\",\"a\"" "\n" \
+ "__crc_" #sym ":" "\n" \
+ ".long " #crc "\n" \
+ ".previous" "\n")
#endif /* __LINUX_EXPORT_INTERNAL_H__ */
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index d445150c5350..ee0d75d9a302 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -73,6 +73,42 @@ struct f2fs_device {
__le32 total_segments;
} __packed;
+/* reason of stop_checkpoint */
+enum stop_cp_reason {
+ STOP_CP_REASON_SHUTDOWN,
+ STOP_CP_REASON_FAULT_INJECT,
+ STOP_CP_REASON_META_PAGE,
+ STOP_CP_REASON_WRITE_FAIL,
+ STOP_CP_REASON_CORRUPTED_SUMMARY,
+ STOP_CP_REASON_UPDATE_INODE,
+ STOP_CP_REASON_FLUSH_FAIL,
+ STOP_CP_REASON_MAX,
+};
+
+#define MAX_STOP_REASON 32
+
+/* detail reason for EFSCORRUPTED */
+enum f2fs_error {
+ ERROR_CORRUPTED_CLUSTER,
+ ERROR_FAIL_DECOMPRESSION,
+ ERROR_INVALID_BLKADDR,
+ ERROR_CORRUPTED_DIRENT,
+ ERROR_CORRUPTED_INODE,
+ ERROR_INCONSISTENT_SUMMARY,
+ ERROR_INCONSISTENT_FOOTER,
+ ERROR_INCONSISTENT_SUM_TYPE,
+ ERROR_CORRUPTED_JOURNAL,
+ ERROR_INCONSISTENT_NODE_COUNT,
+ ERROR_INCONSISTENT_BLOCK_COUNT,
+ ERROR_INVALID_CURSEG,
+ ERROR_INCONSISTENT_SIT,
+ ERROR_CORRUPTED_VERITY_XATTR,
+ ERROR_CORRUPTED_XATTR,
+ ERROR_MAX,
+};
+
+#define MAX_F2FS_ERRORS 16
+
struct f2fs_super_block {
__le32 magic; /* Magic Number */
__le16 major_ver; /* Major Version */
@@ -116,7 +152,9 @@ struct f2fs_super_block {
__u8 hot_ext_count; /* # of hot file extension */
__le16 s_encoding; /* Filename charset encoding */
__le16 s_encoding_flags; /* Filename charset encoding flags */
- __u8 reserved[306]; /* valid reserved region */
+ __u8 s_stop_reason[MAX_STOP_REASON]; /* stop checkpoint reason */
+ __u8 s_errors[MAX_F2FS_ERRORS]; /* reason of image corrupts */
+ __u8 reserved[258]; /* valid reserved region */
__le32 crc; /* checksum of superblock */
} __packed;
diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index 9f6e25467844..444236dadcf0 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -20,7 +20,6 @@ struct fault_attr {
atomic_t space;
unsigned long verbose;
bool task_filter;
- bool no_warn;
unsigned long stacktrace_depth;
unsigned long require_start;
unsigned long require_end;
@@ -32,6 +31,10 @@ struct fault_attr {
struct dentry *dname;
};
+enum fault_flags {
+ FAULT_NOWARN = 1 << 0,
+};
+
#define FAULT_ATTR_INITIALIZER { \
.interval = 1, \
.times = ATOMIC_INIT(1), \
@@ -40,11 +43,11 @@ struct fault_attr {
.ratelimit_state = RATELIMIT_STATE_INIT_DISABLED, \
.verbose = 2, \
.dname = NULL, \
- .no_warn = false, \
}
#define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER
int setup_fault_attr(struct fault_attr *attr, char *str);
+bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags);
bool should_fail(struct fault_attr *attr, ssize_t size);
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 07fcd0e56682..96b96323e9cb 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -555,7 +555,7 @@ static inline struct apertures_struct *alloc_apertures(unsigned int max_num) {
#elif defined(__i386__) || defined(__alpha__) || defined(__x86_64__) || \
defined(__hppa__) || defined(__sh__) || defined(__powerpc__) || \
- defined(__arm__) || defined(__aarch64__)
+ defined(__arm__) || defined(__aarch64__) || defined(__mips__)
#define fb_readb __raw_readb
#define fb_readw __raw_readw
@@ -615,10 +615,6 @@ extern ssize_t fb_sys_write(struct fb_info *info, const char __user *buf,
/* drivers/video/fbmem.c */
extern int register_framebuffer(struct fb_info *fb_info);
extern void unregister_framebuffer(struct fb_info *fb_info);
-extern int remove_conflicting_pci_framebuffers(struct pci_dev *pdev,
- const char *name);
-extern int remove_conflicting_framebuffers(struct apertures_struct *a,
- const char *name, bool primary);
extern int fb_prepare_logo(struct fb_info *fb_info, int rotate);
extern int fb_show_logo(struct fb_info *fb_info, int rotate);
extern char* fb_get_buffer_offset(struct fb_info *info, struct fb_pixmap *buf, u32 size);
@@ -631,16 +627,10 @@ extern int fb_get_color_depth(struct fb_var_screeninfo *var,
extern int fb_get_options(const char *name, char **option);
extern int fb_new_modelist(struct fb_info *info);
-extern struct fb_info *registered_fb[FB_MAX];
-extern int num_registered_fb;
extern bool fb_center_logo;
extern int fb_logo_count;
extern struct class *fb_class;
-#define for_each_registered_fb(i) \
- for (i = 0; i < FB_MAX; i++) \
- if (!registered_fb[i]) {} else
-
static inline void lock_fb_info(struct fb_info *info)
{
mutex_lock(&info->lock);
@@ -813,6 +803,15 @@ extern int fb_find_mode(struct fb_var_screeninfo *var,
const struct fb_videomode *default_mode,
unsigned int default_bpp);
+#if defined(CONFIG_VIDEO_NOMODESET)
+bool fb_modesetting_disabled(const char *drvname);
+#else
+static inline bool fb_modesetting_disabled(const char *drvname)
+{
+ return false;
+}
+#endif
+
/* Convenience logging macros */
#define fb_err(fb_info, fmt, ...) \
pr_err("fb%d: " fmt, (fb_info)->node, ##__VA_ARGS__)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index a5f21dc3c432..bf701976056e 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -567,6 +567,12 @@ struct sk_filter {
DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
+extern struct mutex nf_conn_btf_access_lock;
+extern int (*nfct_btf_struct_access)(struct bpf_verifier_log *log,
+ const struct bpf_reg_state *reg,
+ int off, int size, enum bpf_access_type atype,
+ u32 *next_btf_id, enum bpf_type_flag *flag);
+
typedef unsigned int (*bpf_dispatcher_fn)(const void *ctx,
const struct bpf_insn *insnsi,
unsigned int (*bpf_func)(const void *,
@@ -637,13 +643,13 @@ struct bpf_nh_params {
};
struct bpf_redirect_info {
- u32 flags;
- u32 tgt_index;
+ u64 tgt_index;
void *tgt_value;
struct bpf_map *map;
+ u32 flags;
+ u32 kern_flags;
u32 map_id;
enum bpf_map_type map_type;
- u32 kern_flags;
struct bpf_nh_params nh;
};
@@ -900,8 +906,7 @@ int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk);
int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk);
void sk_reuseport_prog_free(struct bpf_prog *prog);
int sk_detach_filter(struct sock *sk);
-int sk_get_filter(struct sock *sk, struct sock_filter __user *filter,
- unsigned int len);
+int sk_get_filter(struct sock *sk, sockptr_t optval, unsigned int len);
bool sk_filter_charge(struct sock *sk, struct sk_filter *fp);
void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);
@@ -1018,6 +1023,8 @@ extern long bpf_jit_limit_max;
typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size);
+void bpf_jit_fill_hole_with_zero(void *area, unsigned int size);
+
struct bpf_binary_header *
bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
unsigned int alignment,
@@ -1030,6 +1037,9 @@ void bpf_jit_free(struct bpf_prog *fp);
struct bpf_binary_header *
bpf_jit_binary_pack_hdr(const struct bpf_prog *fp);
+void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns);
+void bpf_prog_pack_free(struct bpf_binary_header *hdr);
+
static inline bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp)
{
return list_empty(&fp->aux->ksym.lnode) ||
@@ -1100,7 +1110,7 @@ static inline bool bpf_jit_blinding_enabled(struct bpf_prog *prog)
return false;
if (!bpf_jit_harden)
return false;
- if (bpf_jit_harden == 1 && capable(CAP_SYS_ADMIN))
+ if (bpf_jit_harden == 1 && bpf_capable())
return false;
return true;
@@ -1494,7 +1504,7 @@ static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol,
}
#endif /* IS_ENABLED(CONFIG_IPV6) */
-static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifindex,
+static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u64 index,
u64 flags, const u64 flag_mask,
void *lookup_elem(struct bpf_map *map, u32 key))
{
@@ -1505,7 +1515,7 @@ static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifind
if (unlikely(flags & ~(action_mask | flag_mask)))
return XDP_ABORTED;
- ri->tgt_value = lookup_elem(map, ifindex);
+ ri->tgt_value = lookup_elem(map, index);
if (unlikely(!ri->tgt_value) && !(flags & BPF_F_BROADCAST)) {
/* If the lookup fails we want to clear out the state in the
* redirect_info struct completely, so that if an eBPF program
@@ -1517,7 +1527,7 @@ static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifind
return flags & action_mask;
}
- ri->tgt_index = ifindex;
+ ri->tgt_index = index;
ri->map_id = map->id;
ri->map_type = map->map_type;
diff --git a/include/linux/find.h b/include/linux/find.h
index 424ef67d4a42..ccaf61a0f5fd 100644
--- a/include/linux/find.h
+++ b/include/linux/find.h
@@ -8,15 +8,33 @@
#include <linux/bitops.h>
-extern unsigned long _find_next_bit(const unsigned long *addr1,
- const unsigned long *addr2, unsigned long nbits,
- unsigned long start, unsigned long invert, unsigned long le);
+unsigned long _find_next_bit(const unsigned long *addr1, unsigned long nbits,
+ unsigned long start);
+unsigned long _find_next_and_bit(const unsigned long *addr1, const unsigned long *addr2,
+ unsigned long nbits, unsigned long start);
+unsigned long _find_next_andnot_bit(const unsigned long *addr1, const unsigned long *addr2,
+ unsigned long nbits, unsigned long start);
+unsigned long _find_next_zero_bit(const unsigned long *addr, unsigned long nbits,
+ unsigned long start);
extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long size);
+unsigned long __find_nth_bit(const unsigned long *addr, unsigned long size, unsigned long n);
+unsigned long __find_nth_and_bit(const unsigned long *addr1, const unsigned long *addr2,
+ unsigned long size, unsigned long n);
+unsigned long __find_nth_andnot_bit(const unsigned long *addr1, const unsigned long *addr2,
+ unsigned long size, unsigned long n);
extern unsigned long _find_first_and_bit(const unsigned long *addr1,
const unsigned long *addr2, unsigned long size);
extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size);
extern unsigned long _find_last_bit(const unsigned long *addr, unsigned long size);
+#ifdef __BIG_ENDIAN
+unsigned long _find_first_zero_bit_le(const unsigned long *addr, unsigned long size);
+unsigned long _find_next_zero_bit_le(const unsigned long *addr, unsigned
+ long size, unsigned long offset);
+unsigned long _find_next_bit_le(const unsigned long *addr, unsigned
+ long size, unsigned long offset);
+#endif
+
#ifndef find_next_bit
/**
* find_next_bit - find the next set bit in a memory region
@@ -41,7 +59,7 @@ unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
return val ? __ffs(val) : size;
}
- return _find_next_bit(addr, NULL, size, offset, 0UL, 0);
+ return _find_next_bit(addr, size, offset);
}
#endif
@@ -71,7 +89,38 @@ unsigned long find_next_and_bit(const unsigned long *addr1,
return val ? __ffs(val) : size;
}
- return _find_next_bit(addr1, addr2, size, offset, 0UL, 0);
+ return _find_next_and_bit(addr1, addr2, size, offset);
+}
+#endif
+
+#ifndef find_next_andnot_bit
+/**
+ * find_next_andnot_bit - find the next set bit in *addr1 excluding all the bits
+ * in *addr2
+ * @addr1: The first address to base the search on
+ * @addr2: The second address to base the search on
+ * @size: The bitmap size in bits
+ * @offset: The bitnumber to start searching at
+ *
+ * Returns the bit number for the next set bit
+ * If no bits are set, returns @size.
+ */
+static inline
+unsigned long find_next_andnot_bit(const unsigned long *addr1,
+ const unsigned long *addr2, unsigned long size,
+ unsigned long offset)
+{
+ if (small_const_nbits(size)) {
+ unsigned long val;
+
+ if (unlikely(offset >= size))
+ return size;
+
+ val = *addr1 & ~*addr2 & GENMASK(size - 1, offset);
+ return val ? __ffs(val) : size;
+ }
+
+ return _find_next_andnot_bit(addr1, addr2, size, offset);
}
#endif
@@ -99,7 +148,7 @@ unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
return val == ~0UL ? size : ffz(val);
}
- return _find_next_bit(addr, NULL, size, offset, ~0UL, 0);
+ return _find_next_zero_bit(addr, size, offset);
}
#endif
@@ -125,6 +174,87 @@ unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
}
#endif
+/**
+ * find_nth_bit - find N'th set bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum number of bits to search
+ * @n: The number of set bit, which position is needed, counting from 0
+ *
+ * The following is semantically equivalent:
+ * idx = find_nth_bit(addr, size, 0);
+ * idx = find_first_bit(addr, size);
+ *
+ * Returns the bit number of the N'th set bit.
+ * If no such, returns @size.
+ */
+static inline
+unsigned long find_nth_bit(const unsigned long *addr, unsigned long size, unsigned long n)
+{
+ if (n >= size)
+ return size;
+
+ if (small_const_nbits(size)) {
+ unsigned long val = *addr & GENMASK(size - 1, 0);
+
+ return val ? fns(val, n) : size;
+ }
+
+ return __find_nth_bit(addr, size, n);
+}
+
+/**
+ * find_nth_and_bit - find N'th set bit in 2 memory regions
+ * @addr1: The 1st address to start the search at
+ * @addr2: The 2nd address to start the search at
+ * @size: The maximum number of bits to search
+ * @n: The number of set bit, which position is needed, counting from 0
+ *
+ * Returns the bit number of the N'th set bit.
+ * If no such, returns @size.
+ */
+static inline
+unsigned long find_nth_and_bit(const unsigned long *addr1, const unsigned long *addr2,
+ unsigned long size, unsigned long n)
+{
+ if (n >= size)
+ return size;
+
+ if (small_const_nbits(size)) {
+ unsigned long val = *addr1 & *addr2 & GENMASK(size - 1, 0);
+
+ return val ? fns(val, n) : size;
+ }
+
+ return __find_nth_and_bit(addr1, addr2, size, n);
+}
+
+/**
+ * find_nth_andnot_bit - find N'th set bit in 2 memory regions,
+ * flipping bits in 2nd region
+ * @addr1: The 1st address to start the search at
+ * @addr2: The 2nd address to start the search at
+ * @size: The maximum number of bits to search
+ * @n: The number of set bit, which position is needed, counting from 0
+ *
+ * Returns the bit number of the N'th set bit.
+ * If no such, returns @size.
+ */
+static inline
+unsigned long find_nth_andnot_bit(const unsigned long *addr1, const unsigned long *addr2,
+ unsigned long size, unsigned long n)
+{
+ if (n >= size)
+ return size;
+
+ if (small_const_nbits(size)) {
+ unsigned long val = *addr1 & (~*addr2) & GENMASK(size - 1, 0);
+
+ return val ? fns(val, n) : size;
+ }
+
+ return __find_nth_andnot_bit(addr1, addr2, size, n);
+}
+
#ifndef find_first_and_bit
/**
* find_first_and_bit - find the first set bit in both memory regions
@@ -194,6 +324,78 @@ unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
#endif
/**
+ * find_next_and_bit_wrap - find the next set bit in both memory regions
+ * @addr1: The first address to base the search on
+ * @addr2: The second address to base the search on
+ * @size: The bitmap size in bits
+ * @offset: The bitnumber to start searching at
+ *
+ * Returns the bit number for the next set bit, or first set bit up to @offset
+ * If no bits are set, returns @size.
+ */
+static inline
+unsigned long find_next_and_bit_wrap(const unsigned long *addr1,
+ const unsigned long *addr2,
+ unsigned long size, unsigned long offset)
+{
+ unsigned long bit = find_next_and_bit(addr1, addr2, size, offset);
+
+ if (bit < size)
+ return bit;
+
+ bit = find_first_and_bit(addr1, addr2, offset);
+ return bit < offset ? bit : size;
+}
+
+/**
+ * find_next_bit_wrap - find the next set bit in both memory regions
+ * @addr: The first address to base the search on
+ * @size: The bitmap size in bits
+ * @offset: The bitnumber to start searching at
+ *
+ * Returns the bit number for the next set bit, or first set bit up to @offset
+ * If no bits are set, returns @size.
+ */
+static inline
+unsigned long find_next_bit_wrap(const unsigned long *addr,
+ unsigned long size, unsigned long offset)
+{
+ unsigned long bit = find_next_bit(addr, size, offset);
+
+ if (bit < size)
+ return bit;
+
+ bit = find_first_bit(addr, offset);
+ return bit < offset ? bit : size;
+}
+
+/*
+ * Helper for for_each_set_bit_wrap(). Make sure you're doing right thing
+ * before using it alone.
+ */
+static inline
+unsigned long __for_each_wrap(const unsigned long *bitmap, unsigned long size,
+ unsigned long start, unsigned long n)
+{
+ unsigned long bit;
+
+ /* If not wrapped around */
+ if (n > start) {
+ /* and have a bit, just return it. */
+ bit = find_next_bit(bitmap, size, n);
+ if (bit < size)
+ return bit;
+
+ /* Otherwise, wrap around and ... */
+ n = 0;
+ }
+
+ /* Search the other part. */
+ bit = find_next_bit(bitmap, start, n);
+ return bit < start ? bit : size;
+}
+
+/**
* find_next_clump8 - find next 8-bit clump with set bits in a memory region
* @clump: location to store copy of found clump
* @addr: address to base the search on
@@ -247,7 +449,21 @@ unsigned long find_next_zero_bit_le(const void *addr, unsigned
return val == ~0UL ? size : ffz(val);
}
- return _find_next_bit(addr, NULL, size, offset, ~0UL, 1);
+ return _find_next_zero_bit_le(addr, size, offset);
+}
+#endif
+
+#ifndef find_first_zero_bit_le
+static inline
+unsigned long find_first_zero_bit_le(const void *addr, unsigned long size)
+{
+ if (small_const_nbits(size)) {
+ unsigned long val = swab(*(const unsigned long *)addr) | ~GENMASK(size - 1, 0);
+
+ return val == ~0UL ? size : ffz(val);
+ }
+
+ return _find_first_zero_bit_le(addr, size);
}
#endif
@@ -266,40 +482,39 @@ unsigned long find_next_bit_le(const void *addr, unsigned
return val ? __ffs(val) : size;
}
- return _find_next_bit(addr, NULL, size, offset, 0UL, 1);
+ return _find_next_bit_le(addr, size, offset);
}
#endif
-#ifndef find_first_zero_bit_le
-#define find_first_zero_bit_le(addr, size) \
- find_next_zero_bit_le((addr), (size), 0)
-#endif
-
#else
#error "Please fix <asm/byteorder.h>"
#endif
#define for_each_set_bit(bit, addr, size) \
- for ((bit) = find_next_bit((addr), (size), 0); \
- (bit) < (size); \
- (bit) = find_next_bit((addr), (size), (bit) + 1))
+ for ((bit) = 0; (bit) = find_next_bit((addr), (size), (bit)), (bit) < (size); (bit)++)
+
+#define for_each_and_bit(bit, addr1, addr2, size) \
+ for ((bit) = 0; \
+ (bit) = find_next_and_bit((addr1), (addr2), (size), (bit)), (bit) < (size);\
+ (bit)++)
+
+#define for_each_andnot_bit(bit, addr1, addr2, size) \
+ for ((bit) = 0; \
+ (bit) = find_next_andnot_bit((addr1), (addr2), (size), (bit)), (bit) < (size);\
+ (bit)++)
/* same as for_each_set_bit() but use bit as value to start with */
#define for_each_set_bit_from(bit, addr, size) \
- for ((bit) = find_next_bit((addr), (size), (bit)); \
- (bit) < (size); \
- (bit) = find_next_bit((addr), (size), (bit) + 1))
+ for (; (bit) = find_next_bit((addr), (size), (bit)), (bit) < (size); (bit)++)
#define for_each_clear_bit(bit, addr, size) \
- for ((bit) = find_next_zero_bit((addr), (size), 0); \
- (bit) < (size); \
- (bit) = find_next_zero_bit((addr), (size), (bit) + 1))
+ for ((bit) = 0; \
+ (bit) = find_next_zero_bit((addr), (size), (bit)), (bit) < (size); \
+ (bit)++)
/* same as for_each_clear_bit() but use bit as value to start with */
#define for_each_clear_bit_from(bit, addr, size) \
- for ((bit) = find_next_zero_bit((addr), (size), (bit)); \
- (bit) < (size); \
- (bit) = find_next_zero_bit((addr), (size), (bit) + 1))
+ for (; (bit) = find_next_zero_bit((addr), (size), (bit)), (bit) < (size); (bit)++)
/**
* for_each_set_bitrange - iterate over all set bit ranges [b; e)
@@ -309,11 +524,11 @@ unsigned long find_next_bit_le(const void *addr, unsigned
* @size: bitmap size in number of bits
*/
#define for_each_set_bitrange(b, e, addr, size) \
- for ((b) = find_next_bit((addr), (size), 0), \
- (e) = find_next_zero_bit((addr), (size), (b) + 1); \
+ for ((b) = 0; \
+ (b) = find_next_bit((addr), (size), b), \
+ (e) = find_next_zero_bit((addr), (size), (b) + 1), \
(b) < (size); \
- (b) = find_next_bit((addr), (size), (e) + 1), \
- (e) = find_next_zero_bit((addr), (size), (b) + 1))
+ (b) = (e) + 1)
/**
* for_each_set_bitrange_from - iterate over all set bit ranges [b; e)
@@ -323,11 +538,11 @@ unsigned long find_next_bit_le(const void *addr, unsigned
* @size: bitmap size in number of bits
*/
#define for_each_set_bitrange_from(b, e, addr, size) \
- for ((b) = find_next_bit((addr), (size), (b)), \
- (e) = find_next_zero_bit((addr), (size), (b) + 1); \
+ for (; \
+ (b) = find_next_bit((addr), (size), (b)), \
+ (e) = find_next_zero_bit((addr), (size), (b) + 1), \
(b) < (size); \
- (b) = find_next_bit((addr), (size), (e) + 1), \
- (e) = find_next_zero_bit((addr), (size), (b) + 1))
+ (b) = (e) + 1)
/**
* for_each_clear_bitrange - iterate over all unset bit ranges [b; e)
@@ -337,11 +552,11 @@ unsigned long find_next_bit_le(const void *addr, unsigned
* @size: bitmap size in number of bits
*/
#define for_each_clear_bitrange(b, e, addr, size) \
- for ((b) = find_next_zero_bit((addr), (size), 0), \
- (e) = find_next_bit((addr), (size), (b) + 1); \
+ for ((b) = 0; \
+ (b) = find_next_zero_bit((addr), (size), (b)), \
+ (e) = find_next_bit((addr), (size), (b) + 1), \
(b) < (size); \
- (b) = find_next_zero_bit((addr), (size), (e) + 1), \
- (e) = find_next_bit((addr), (size), (b) + 1))
+ (b) = (e) + 1)
/**
* for_each_clear_bitrange_from - iterate over all unset bit ranges [b; e)
@@ -351,11 +566,24 @@ unsigned long find_next_bit_le(const void *addr, unsigned
* @size: bitmap size in number of bits
*/
#define for_each_clear_bitrange_from(b, e, addr, size) \
- for ((b) = find_next_zero_bit((addr), (size), (b)), \
- (e) = find_next_bit((addr), (size), (b) + 1); \
+ for (; \
+ (b) = find_next_zero_bit((addr), (size), (b)), \
+ (e) = find_next_bit((addr), (size), (b) + 1), \
(b) < (size); \
- (b) = find_next_zero_bit((addr), (size), (e) + 1), \
- (e) = find_next_bit((addr), (size), (b) + 1))
+ (b) = (e) + 1)
+
+/**
+ * for_each_set_bit_wrap - iterate over all set bits starting from @start, and
+ * wrapping around the end of bitmap.
+ * @bit: offset for current iteration
+ * @addr: bitmap address to base the search on
+ * @size: bitmap size in number of bits
+ * @start: Starting bit for bitmap traversing, wrapping around the bitmap end
+ */
+#define for_each_set_bit_wrap(bit, addr, size, start) \
+ for ((bit) = find_next_bit_wrap((addr), (size), (start)); \
+ (bit) < (size); \
+ (bit) = __for_each_wrap((addr), (size), (start), (bit) + 1))
/**
* for_each_set_clump8 - iterate over bitmap for each 8-bit clump with set bits
diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h
index 9f50dacbf7d6..fac37680ffe7 100644
--- a/include/linux/firmware/xlnx-zynqmp.h
+++ b/include/linux/firmware/xlnx-zynqmp.h
@@ -135,6 +135,7 @@ enum pm_ret_status {
};
enum pm_ioctl_id {
+ IOCTL_SET_TAPDELAY_BYPASS = 4,
IOCTL_SD_DLL_RESET = 6,
IOCTL_SET_SD_TAPDELAY = 7,
IOCTL_SET_PLL_FRAC_MODE = 8,
@@ -153,6 +154,9 @@ enum pm_ioctl_id {
/* Runtime feature configuration */
IOCTL_SET_FEATURE_CONFIG = 26,
IOCTL_GET_FEATURE_CONFIG = 27,
+ /* Dynamic SD/GEM configuration */
+ IOCTL_SET_SD_CONFIG = 30,
+ IOCTL_SET_GEM_CONFIG = 31,
};
enum pm_query_id {
@@ -386,6 +390,18 @@ enum zynqmp_pm_shutdown_subtype {
ZYNQMP_PM_SHUTDOWN_SUBTYPE_SYSTEM = 2,
};
+enum tap_delay_signal_type {
+ PM_TAPDELAY_NAND_DQS_IN = 0,
+ PM_TAPDELAY_NAND_DQS_OUT = 1,
+ PM_TAPDELAY_QSPI = 2,
+ PM_TAPDELAY_MAX = 3,
+};
+
+enum tap_delay_bypass_ctrl {
+ PM_TAPDELAY_BYPASS_DISABLE = 0,
+ PM_TAPDELAY_BYPASS_ENABLE = 1,
+};
+
enum ospi_mux_select_type {
PM_OSPI_MUX_SEL_DMA = 0,
PM_OSPI_MUX_SEL_LINEAR = 1,
@@ -400,6 +416,30 @@ enum pm_feature_config_id {
};
/**
+ * enum pm_sd_config_type - PM SD configuration.
+ * @SD_CONFIG_EMMC_SEL: To set SD_EMMC_SEL in CTRL_REG_SD and SD_SLOTTYPE
+ * @SD_CONFIG_BASECLK: To set SD_BASECLK in SD_CONFIG_REG1
+ * @SD_CONFIG_8BIT: To set SD_8BIT in SD_CONFIG_REG2
+ * @SD_CONFIG_FIXED: To set fixed config registers
+ */
+enum pm_sd_config_type {
+ SD_CONFIG_EMMC_SEL = 1,
+ SD_CONFIG_BASECLK = 2,
+ SD_CONFIG_8BIT = 3,
+ SD_CONFIG_FIXED = 4,
+};
+
+/**
+ * enum pm_gem_config_type - PM GEM configuration.
+ * @GEM_CONFIG_SGMII_MODE: To set GEM_SGMII_MODE in GEM_CLK_CTRL register
+ * @GEM_CONFIG_FIXED: To set fixed config registers
+ */
+enum pm_gem_config_type {
+ GEM_CONFIG_SGMII_MODE = 1,
+ GEM_CONFIG_FIXED = 2,
+};
+
+/**
* struct zynqmp_pm_query_data - PM query data
* @qid: query ID
* @arg1: Argument 1 of query data
@@ -457,6 +497,7 @@ int zynqmp_pm_write_ggs(u32 index, u32 value);
int zynqmp_pm_read_ggs(u32 index, u32 *value);
int zynqmp_pm_write_pggs(u32 index, u32 value);
int zynqmp_pm_read_pggs(u32 index, u32 *value);
+int zynqmp_pm_set_tapdelay_bypass(u32 index, u32 value);
int zynqmp_pm_system_shutdown(const u32 type, const u32 subtype);
int zynqmp_pm_set_boot_health_status(u32 value);
int zynqmp_pm_pinctrl_request(const u32 pin);
@@ -475,6 +516,9 @@ int zynqmp_pm_is_function_supported(const u32 api_id, const u32 id);
int zynqmp_pm_set_feature_config(enum pm_feature_config_id id, u32 value);
int zynqmp_pm_get_feature_config(enum pm_feature_config_id id, u32 *payload);
int zynqmp_pm_register_sgi(u32 sgi_num, u32 reset);
+int zynqmp_pm_set_sd_config(u32 node, enum pm_sd_config_type config, u32 value);
+int zynqmp_pm_set_gem_config(u32 node, enum pm_gem_config_type config,
+ u32 value);
#else
static inline int zynqmp_pm_get_api_version(u32 *version)
{
@@ -666,6 +710,11 @@ static inline int zynqmp_pm_read_pggs(u32 index, u32 *value)
return -ENODEV;
}
+static inline int zynqmp_pm_set_tapdelay_bypass(u32 index, u32 value)
+{
+ return -ENODEV;
+}
+
static inline int zynqmp_pm_system_shutdown(const u32 type, const u32 subtype)
{
return -ENODEV;
@@ -745,6 +794,21 @@ static inline int zynqmp_pm_register_sgi(u32 sgi_num, u32 reset)
{
return -ENODEV;
}
+
+static inline int zynqmp_pm_set_sd_config(u32 node,
+ enum pm_sd_config_type config,
+ u32 value)
+{
+ return -ENODEV;
+}
+
+static inline int zynqmp_pm_set_gem_config(u32 node,
+ enum pm_gem_config_type config,
+ u32 value)
+{
+ return -ENODEV;
+}
+
#endif
#endif /* __FIRMWARE_ZYNQMP_H__ */
diff --git a/include/linux/fixp-arith.h b/include/linux/fixp-arith.h
index 281cb4f83dbe..e485fb0c1201 100644
--- a/include/linux/fixp-arith.h
+++ b/include/linux/fixp-arith.h
@@ -2,6 +2,7 @@
#ifndef _FIXP_ARITH_H
#define _FIXP_ARITH_H
+#include <linux/bug.h>
#include <linux/math64.h>
/*
diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h
index 3b401fa0f374..7cad8bb031e9 100644
--- a/include/linux/fortify-string.h
+++ b/include/linux/fortify-string.h
@@ -2,7 +2,9 @@
#ifndef _LINUX_FORTIFY_STRING_H_
#define _LINUX_FORTIFY_STRING_H_
+#include <linux/bug.h>
#include <linux/const.h>
+#include <linux/limits.h>
#define __FORTIFY_INLINE extern __always_inline __gnu_inline __overloadable
#define __RENAME(x) __asm__(#x)
@@ -16,10 +18,11 @@ void __write_overflow_field(size_t avail, size_t wanted) __compiletime_warning("
#define __compiletime_strlen(p) \
({ \
- unsigned char *__p = (unsigned char *)(p); \
- size_t __ret = (size_t)-1; \
- size_t __p_size = __builtin_object_size(p, 1); \
- if (__p_size != (size_t)-1) { \
+ char *__p = (char *)(p); \
+ size_t __ret = SIZE_MAX; \
+ size_t __p_size = __member_size(p); \
+ if (__p_size != SIZE_MAX && \
+ __builtin_constant_p(*__p)) { \
size_t __p_len = __p_size - 1; \
if (__builtin_constant_p(__p[__p_len]) && \
__p[__p_len] == '\0') \
@@ -40,11 +43,24 @@ extern __kernel_size_t __underlying_strlen(const char *p) __RENAME(strlen);
extern char *__underlying_strncat(char *p, const char *q, __kernel_size_t count) __RENAME(strncat);
extern char *__underlying_strncpy(char *p, const char *q, __kernel_size_t size) __RENAME(strncpy);
#else
-#define __underlying_memchr __builtin_memchr
-#define __underlying_memcmp __builtin_memcmp
+
+#if defined(__SANITIZE_MEMORY__)
+/*
+ * For KMSAN builds all memcpy/memset/memmove calls should be replaced by the
+ * corresponding __msan_XXX functions.
+ */
+#include <linux/kmsan_string.h>
+#define __underlying_memcpy __msan_memcpy
+#define __underlying_memmove __msan_memmove
+#define __underlying_memset __msan_memset
+#else
#define __underlying_memcpy __builtin_memcpy
#define __underlying_memmove __builtin_memmove
#define __underlying_memset __builtin_memset
+#endif
+
+#define __underlying_memchr __builtin_memchr
+#define __underlying_memcmp __builtin_memcmp
#define __underlying_strcat __builtin_strcat
#define __underlying_strcpy __builtin_strcpy
#define __underlying_strlen __builtin_strlen
@@ -69,32 +85,86 @@ extern char *__underlying_strncpy(char *p, const char *q, __kernel_size_t size)
__underlying_memcpy(dst, src, bytes)
/*
- * Clang's use of __builtin_object_size() within inlines needs hinting via
- * __pass_object_size(). The preference is to only ever use type 1 (member
+ * Clang's use of __builtin_*object_size() within inlines needs hinting via
+ * __pass_*object_size(). The preference is to only ever use type 1 (member
* size, rather than struct size), but there remain some stragglers using
* type 0 that will be converted in the future.
*/
-#define POS __pass_object_size(1)
-#define POS0 __pass_object_size(0)
+#define POS __pass_object_size(1)
+#define POS0 __pass_object_size(0)
+#define __struct_size(p) __builtin_object_size(p, 0)
+#define __member_size(p) __builtin_object_size(p, 1)
+
+#define __compiletime_lessthan(bounds, length) ( \
+ __builtin_constant_p((bounds) < (length)) && \
+ (bounds) < (length) \
+)
+/**
+ * strncpy - Copy a string to memory with non-guaranteed NUL padding
+ *
+ * @p: pointer to destination of copy
+ * @q: pointer to NUL-terminated source string to copy
+ * @size: bytes to write at @p
+ *
+ * If strlen(@q) >= @size, the copy of @q will stop after @size bytes,
+ * and @p will NOT be NUL-terminated
+ *
+ * If strlen(@q) < @size, following the copy of @q, trailing NUL bytes
+ * will be written to @p until @size total bytes have been written.
+ *
+ * Do not use this function. While FORTIFY_SOURCE tries to avoid
+ * over-reads of @q, it cannot defend against writing unterminated
+ * results to @p. Using strncpy() remains ambiguous and fragile.
+ * Instead, please choose an alternative, so that the expectation
+ * of @p's contents is unambiguous:
+ *
+ * +--------------------+--------------------+------------+
+ * | **p** needs to be: | padded to **size** | not padded |
+ * +====================+====================+============+
+ * | NUL-terminated | strscpy_pad() | strscpy() |
+ * +--------------------+--------------------+------------+
+ * | not NUL-terminated | strtomem_pad() | strtomem() |
+ * +--------------------+--------------------+------------+
+ *
+ * Note strscpy*()'s differing return values for detecting truncation,
+ * and strtomem*()'s expectation that the destination is marked with
+ * __nonstring when it is a character array.
+ *
+ */
__FORTIFY_INLINE __diagnose_as(__builtin_strncpy, 1, 2, 3)
char *strncpy(char * const POS p, const char *q, __kernel_size_t size)
{
- size_t p_size = __builtin_object_size(p, 1);
+ size_t p_size = __member_size(p);
- if (__builtin_constant_p(size) && p_size < size)
+ if (__compiletime_lessthan(p_size, size))
__write_overflow();
if (p_size < size)
fortify_panic(__func__);
return __underlying_strncpy(p, q, size);
}
+/**
+ * strcat - Append a string to an existing string
+ *
+ * @p: pointer to NUL-terminated string to append to
+ * @q: pointer to NUL-terminated source string to append from
+ *
+ * Do not use this function. While FORTIFY_SOURCE tries to avoid
+ * read and write overflows, this is only possible when the
+ * destination buffer size is known to the compiler. Prefer
+ * building the string with formatting, via scnprintf() or similar.
+ * At the very least, use strncat().
+ *
+ * Returns @p.
+ *
+ */
__FORTIFY_INLINE __diagnose_as(__builtin_strcat, 1, 2)
char *strcat(char * const POS p, const char *q)
{
- size_t p_size = __builtin_object_size(p, 1);
+ size_t p_size = __member_size(p);
- if (p_size == (size_t)-1)
+ if (p_size == SIZE_MAX)
return __underlying_strcat(p, q);
if (strlcat(p, q, p_size) >= p_size)
fortify_panic(__func__);
@@ -102,14 +172,24 @@ char *strcat(char * const POS p, const char *q)
}
extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen);
+/**
+ * strnlen - Return bounded count of characters in a NUL-terminated string
+ *
+ * @p: pointer to NUL-terminated string to count.
+ * @maxlen: maximum number of characters to count.
+ *
+ * Returns number of characters in @p (NOT including the final NUL), or
+ * @maxlen, if no NUL has been found up to there.
+ *
+ */
__FORTIFY_INLINE __kernel_size_t strnlen(const char * const POS p, __kernel_size_t maxlen)
{
- size_t p_size = __builtin_object_size(p, 1);
+ size_t p_size = __member_size(p);
size_t p_len = __compiletime_strlen(p);
size_t ret;
/* We can take compile-time actions when maxlen is const. */
- if (__builtin_constant_p(maxlen) && p_len != (size_t)-1) {
+ if (__builtin_constant_p(maxlen) && p_len != SIZE_MAX) {
/* If p is const, we can use its compile-time-known len. */
if (maxlen >= p_size)
return p_len;
@@ -127,6 +207,19 @@ __FORTIFY_INLINE __kernel_size_t strnlen(const char * const POS p, __kernel_size
* possible for strlen() to be used on compile-time strings for use in
* static initializers (i.e. as a constant expression).
*/
+/**
+ * strlen - Return count of characters in a NUL-terminated string
+ *
+ * @p: pointer to NUL-terminated string to count.
+ *
+ * Do not use this function unless the string length is known at
+ * compile-time. When @p is unterminated, this function may crash
+ * or return unexpected counts that could lead to memory content
+ * exposures. Prefer strnlen().
+ *
+ * Returns number of characters in @p (NOT including the final NUL).
+ *
+ */
#define strlen(p) \
__builtin_choose_expr(__is_constexpr(__builtin_strlen(p)), \
__builtin_strlen(p), __fortify_strlen(p))
@@ -134,10 +227,10 @@ __FORTIFY_INLINE __diagnose_as(__builtin_strlen, 1)
__kernel_size_t __fortify_strlen(const char * const POS p)
{
__kernel_size_t ret;
- size_t p_size = __builtin_object_size(p, 1);
+ size_t p_size = __member_size(p);
/* Give up if we don't know how large p is. */
- if (p_size == (size_t)-1)
+ if (p_size == SIZE_MAX)
return __underlying_strlen(p);
ret = strnlen(p, p_size);
if (p_size <= ret)
@@ -145,16 +238,34 @@ __kernel_size_t __fortify_strlen(const char * const POS p)
return ret;
}
-/* defined after fortified strlen to reuse it */
+/* Defined after fortified strlen() to reuse it. */
extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy);
+/**
+ * strlcpy - Copy a string into another string buffer
+ *
+ * @p: pointer to destination of copy
+ * @q: pointer to NUL-terminated source string to copy
+ * @size: maximum number of bytes to write at @p
+ *
+ * If strlen(@q) >= @size, the copy of @q will be truncated at
+ * @size - 1 bytes. @p will always be NUL-terminated.
+ *
+ * Do not use this function. While FORTIFY_SOURCE tries to avoid
+ * over-reads when calculating strlen(@q), it is still possible.
+ * Prefer strscpy(), though note its different return values for
+ * detecting truncation.
+ *
+ * Returns total number of bytes written to @p, including terminating NUL.
+ *
+ */
__FORTIFY_INLINE size_t strlcpy(char * const POS p, const char * const POS q, size_t size)
{
- size_t p_size = __builtin_object_size(p, 1);
- size_t q_size = __builtin_object_size(q, 1);
+ size_t p_size = __member_size(p);
+ size_t q_size = __member_size(q);
size_t q_len; /* Full count of source string length. */
size_t len; /* Count of characters going into destination. */
- if (p_size == (size_t)-1 && q_size == (size_t)-1)
+ if (p_size == SIZE_MAX && q_size == SIZE_MAX)
return __real_strlcpy(p, q, size);
q_len = strlen(q);
len = (q_len >= size) ? size - 1 : q_len;
@@ -172,26 +283,60 @@ __FORTIFY_INLINE size_t strlcpy(char * const POS p, const char * const POS q, si
return q_len;
}
-/* defined after fortified strnlen to reuse it */
+/* Defined after fortified strnlen() to reuse it. */
extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(strscpy);
+/**
+ * strscpy - Copy a C-string into a sized buffer
+ *
+ * @p: Where to copy the string to
+ * @q: Where to copy the string from
+ * @size: Size of destination buffer
+ *
+ * Copy the source string @p, or as much of it as fits, into the destination
+ * @q buffer. The behavior is undefined if the string buffers overlap. The
+ * destination @p buffer is always NUL terminated, unless it's zero-sized.
+ *
+ * Preferred to strlcpy() since the API doesn't require reading memory
+ * from the source @q string beyond the specified @size bytes, and since
+ * the return value is easier to error-check than strlcpy()'s.
+ * In addition, the implementation is robust to the string changing out
+ * from underneath it, unlike the current strlcpy() implementation.
+ *
+ * Preferred to strncpy() since it always returns a valid string, and
+ * doesn't unnecessarily force the tail of the destination buffer to be
+ * zero padded. If padding is desired please use strscpy_pad().
+ *
+ * Returns the number of characters copied in @p (not including the
+ * trailing %NUL) or -E2BIG if @size is 0 or the copy of @q was truncated.
+ */
__FORTIFY_INLINE ssize_t strscpy(char * const POS p, const char * const POS q, size_t size)
{
size_t len;
/* Use string size rather than possible enclosing struct size. */
- size_t p_size = __builtin_object_size(p, 1);
- size_t q_size = __builtin_object_size(q, 1);
+ size_t p_size = __member_size(p);
+ size_t q_size = __member_size(q);
/* If we cannot get size of p and q default to call strscpy. */
- if (p_size == (size_t) -1 && q_size == (size_t) -1)
+ if (p_size == SIZE_MAX && q_size == SIZE_MAX)
return __real_strscpy(p, q, size);
/*
* If size can be known at compile time and is greater than
* p_size, generate a compile time write overflow error.
*/
- if (__builtin_constant_p(size) && size > p_size)
+ if (__compiletime_lessthan(p_size, size))
__write_overflow();
+ /* Short-circuit for compile-time known-safe lengths. */
+ if (__compiletime_lessthan(p_size, SIZE_MAX)) {
+ len = __compiletime_strlen(q);
+
+ if (len < SIZE_MAX && __compiletime_lessthan(len, size)) {
+ __underlying_memcpy(p, q, len + 1);
+ return len;
+ }
+ }
+
/*
* This call protects from read overflow, because len will default to q
* length if it smaller than size.
@@ -219,15 +364,34 @@ __FORTIFY_INLINE ssize_t strscpy(char * const POS p, const char * const POS q, s
return __real_strscpy(p, q, len);
}
-/* defined after fortified strlen and strnlen to reuse them */
+/**
+ * strncat - Append a string to an existing string
+ *
+ * @p: pointer to NUL-terminated string to append to
+ * @q: pointer to source string to append from
+ * @count: Maximum bytes to read from @q
+ *
+ * Appends at most @count bytes from @q (stopping at the first
+ * NUL byte) after the NUL-terminated string at @p. @p will be
+ * NUL-terminated.
+ *
+ * Do not use this function. While FORTIFY_SOURCE tries to avoid
+ * read and write overflows, this is only possible when the sizes
+ * of @p and @q are known to the compiler. Prefer building the
+ * string with formatting, via scnprintf() or similar.
+ *
+ * Returns @p.
+ *
+ */
+/* Defined after fortified strlen() and strnlen() to reuse them. */
__FORTIFY_INLINE __diagnose_as(__builtin_strncat, 1, 2, 3)
char *strncat(char * const POS p, const char * const POS q, __kernel_size_t count)
{
size_t p_len, copy_len;
- size_t p_size = __builtin_object_size(p, 1);
- size_t q_size = __builtin_object_size(q, 1);
+ size_t p_size = __member_size(p);
+ size_t q_size = __member_size(q);
- if (p_size == (size_t)-1 && q_size == (size_t)-1)
+ if (p_size == SIZE_MAX && q_size == SIZE_MAX)
return __underlying_strncat(p, q, count);
p_len = strlen(p);
copy_len = strnlen(q, count);
@@ -246,15 +410,16 @@ __FORTIFY_INLINE void fortify_memset_chk(__kernel_size_t size,
/*
* Length argument is a constant expression, so we
* can perform compile-time bounds checking where
- * buffer sizes are known.
+ * buffer sizes are also known at compile time.
*/
/* Error when size is larger than enclosing struct. */
- if (p_size > p_size_field && p_size < size)
+ if (__compiletime_lessthan(p_size_field, p_size) &&
+ __compiletime_lessthan(p_size, size))
__write_overflow();
/* Warn when write size is larger than dest field. */
- if (p_size_field < size)
+ if (__compiletime_lessthan(p_size_field, size))
__write_overflow_field(p_size_field, size);
}
/*
@@ -268,10 +433,10 @@ __FORTIFY_INLINE void fortify_memset_chk(__kernel_size_t size,
/*
* Always stop accesses beyond the struct that contains the
* field, when the buffer's remaining size is known.
- * (The -1 test is to optimize away checks where the buffer
+ * (The SIZE_MAX test is to optimize away checks where the buffer
* lengths are unknown.)
*/
- if (p_size != (size_t)(-1) && p_size < size)
+ if (p_size != SIZE_MAX && p_size < size)
fortify_panic("memset");
}
@@ -282,11 +447,13 @@ __FORTIFY_INLINE void fortify_memset_chk(__kernel_size_t size,
})
/*
- * __builtin_object_size() must be captured here to avoid evaluating argument
- * side-effects further into the macro layers.
+ * __struct_size() vs __member_size() must be captured here to avoid
+ * evaluating argument side-effects further into the macro layers.
*/
+#ifndef CONFIG_KMSAN
#define memset(p, c, s) __fortify_memset_chk(p, c, s, \
- __builtin_object_size(p, 0), __builtin_object_size(p, 1))
+ __struct_size(p), __member_size(p))
+#endif
/*
* To make sure the compiler can enforce protection against buffer overflows,
@@ -319,7 +486,7 @@ __FORTIFY_INLINE void fortify_memset_chk(__kernel_size_t size,
* V = vulnerable to run-time overflow (will need refactoring to solve)
*
*/
-__FORTIFY_INLINE void fortify_memcpy_chk(__kernel_size_t size,
+__FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size,
const size_t p_size,
const size_t q_size,
const size_t p_size_field,
@@ -330,25 +497,28 @@ __FORTIFY_INLINE void fortify_memcpy_chk(__kernel_size_t size,
/*
* Length argument is a constant expression, so we
* can perform compile-time bounds checking where
- * buffer sizes are known.
+ * buffer sizes are also known at compile time.
*/
/* Error when size is larger than enclosing struct. */
- if (p_size > p_size_field && p_size < size)
+ if (__compiletime_lessthan(p_size_field, p_size) &&
+ __compiletime_lessthan(p_size, size))
__write_overflow();
- if (q_size > q_size_field && q_size < size)
+ if (__compiletime_lessthan(q_size_field, q_size) &&
+ __compiletime_lessthan(q_size, size))
__read_overflow2();
/* Warn when write size argument larger than dest field. */
- if (p_size_field < size)
+ if (__compiletime_lessthan(p_size_field, size))
__write_overflow_field(p_size_field, size);
/*
* Warn for source field over-read when building with W=1
* or when an over-write happened, so both can be fixed at
* the same time.
*/
- if ((IS_ENABLED(KBUILD_EXTRA_WARN1) || p_size_field < size) &&
- q_size_field < size)
+ if ((IS_ENABLED(KBUILD_EXTRA_WARN1) ||
+ __compiletime_lessthan(p_size_field, size)) &&
+ __compiletime_lessthan(q_size_field, size))
__read_overflow2_field(q_size_field, size);
}
/*
@@ -362,41 +532,109 @@ __FORTIFY_INLINE void fortify_memcpy_chk(__kernel_size_t size,
/*
* Always stop accesses beyond the struct that contains the
* field, when the buffer's remaining size is known.
- * (The -1 test is to optimize away checks where the buffer
+ * (The SIZE_MAX test is to optimize away checks where the buffer
* lengths are unknown.)
*/
- if ((p_size != (size_t)(-1) && p_size < size) ||
- (q_size != (size_t)(-1) && q_size < size))
+ if ((p_size != SIZE_MAX && p_size < size) ||
+ (q_size != SIZE_MAX && q_size < size))
fortify_panic(func);
+
+ /*
+ * Warn when writing beyond destination field size.
+ *
+ * We must ignore p_size_field == 0 for existing 0-element
+ * fake flexible arrays, until they are all converted to
+ * proper flexible arrays.
+ *
+ * The implementation of __builtin_*object_size() behaves
+ * like sizeof() when not directly referencing a flexible
+ * array member, which means there will be many bounds checks
+ * that will appear at run-time, without a way for them to be
+ * detected at compile-time (as can be done when the destination
+ * is specifically the flexible array member).
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101832
+ */
+ if (p_size_field != 0 && p_size_field != SIZE_MAX &&
+ p_size != p_size_field && p_size_field < size)
+ return true;
+
+ return false;
}
#define __fortify_memcpy_chk(p, q, size, p_size, q_size, \
p_size_field, q_size_field, op) ({ \
- size_t __fortify_size = (size_t)(size); \
- fortify_memcpy_chk(__fortify_size, p_size, q_size, \
- p_size_field, q_size_field, #op); \
+ const size_t __fortify_size = (size_t)(size); \
+ const size_t __p_size = (p_size); \
+ const size_t __q_size = (q_size); \
+ const size_t __p_size_field = (p_size_field); \
+ const size_t __q_size_field = (q_size_field); \
+ WARN_ONCE(fortify_memcpy_chk(__fortify_size, __p_size, \
+ __q_size, __p_size_field, \
+ __q_size_field, #op), \
+ #op ": detected field-spanning write (size %zu) of single %s (size %zu)\n", \
+ __fortify_size, \
+ "field \"" #p "\" at " __FILE__ ":" __stringify(__LINE__), \
+ __p_size_field); \
__underlying_##op(p, q, __fortify_size); \
})
/*
- * __builtin_object_size() must be captured here to avoid evaluating argument
- * side-effects further into the macro layers.
+ * Notes about compile-time buffer size detection:
+ *
+ * With these types...
+ *
+ * struct middle {
+ * u16 a;
+ * u8 middle_buf[16];
+ * int b;
+ * };
+ * struct end {
+ * u16 a;
+ * u8 end_buf[16];
+ * };
+ * struct flex {
+ * int a;
+ * u8 flex_buf[];
+ * };
+ *
+ * void func(TYPE *ptr) { ... }
+ *
+ * Cases where destination size cannot be currently detected:
+ * - the size of ptr's object (seemingly by design, gcc & clang fail):
+ * __builtin_object_size(ptr, 1) == SIZE_MAX
+ * - the size of flexible arrays in ptr's obj (by design, dynamic size):
+ * __builtin_object_size(ptr->flex_buf, 1) == SIZE_MAX
+ * - the size of ANY array at the end of ptr's obj (gcc and clang bug):
+ * __builtin_object_size(ptr->end_buf, 1) == SIZE_MAX
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101836
+ *
+ * Cases where destination size is currently detected:
+ * - the size of non-array members within ptr's object:
+ * __builtin_object_size(ptr->a, 1) == 2
+ * - the size of non-flexible-array in the middle of ptr's obj:
+ * __builtin_object_size(ptr->middle_buf, 1) == 16
+ *
+ */
+
+/*
+ * __struct_size() vs __member_size() must be captured here to avoid
+ * evaluating argument side-effects further into the macro layers.
*/
#define memcpy(p, q, s) __fortify_memcpy_chk(p, q, s, \
- __builtin_object_size(p, 0), __builtin_object_size(q, 0), \
- __builtin_object_size(p, 1), __builtin_object_size(q, 1), \
+ __struct_size(p), __struct_size(q), \
+ __member_size(p), __member_size(q), \
memcpy)
#define memmove(p, q, s) __fortify_memcpy_chk(p, q, s, \
- __builtin_object_size(p, 0), __builtin_object_size(q, 0), \
- __builtin_object_size(p, 1), __builtin_object_size(q, 1), \
+ __struct_size(p), __struct_size(q), \
+ __member_size(p), __member_size(q), \
memmove)
extern void *__real_memscan(void *, int, __kernel_size_t) __RENAME(memscan);
__FORTIFY_INLINE void *memscan(void * const POS0 p, int c, __kernel_size_t size)
{
- size_t p_size = __builtin_object_size(p, 0);
+ size_t p_size = __struct_size(p);
- if (__builtin_constant_p(size) && p_size < size)
+ if (__compiletime_lessthan(p_size, size))
__read_overflow();
if (p_size < size)
fortify_panic(__func__);
@@ -406,13 +644,13 @@ __FORTIFY_INLINE void *memscan(void * const POS0 p, int c, __kernel_size_t size)
__FORTIFY_INLINE __diagnose_as(__builtin_memcmp, 1, 2, 3)
int memcmp(const void * const POS0 p, const void * const POS0 q, __kernel_size_t size)
{
- size_t p_size = __builtin_object_size(p, 0);
- size_t q_size = __builtin_object_size(q, 0);
+ size_t p_size = __struct_size(p);
+ size_t q_size = __struct_size(q);
if (__builtin_constant_p(size)) {
- if (p_size < size)
+ if (__compiletime_lessthan(p_size, size))
__read_overflow();
- if (q_size < size)
+ if (__compiletime_lessthan(q_size, size))
__read_overflow2();
}
if (p_size < size || q_size < size)
@@ -423,9 +661,9 @@ int memcmp(const void * const POS0 p, const void * const POS0 q, __kernel_size_t
__FORTIFY_INLINE __diagnose_as(__builtin_memchr, 1, 2, 3)
void *memchr(const void * const POS0 p, int c, __kernel_size_t size)
{
- size_t p_size = __builtin_object_size(p, 0);
+ size_t p_size = __struct_size(p);
- if (__builtin_constant_p(size) && p_size < size)
+ if (__compiletime_lessthan(p_size, size))
__read_overflow();
if (p_size < size)
fortify_panic(__func__);
@@ -435,41 +673,58 @@ void *memchr(const void * const POS0 p, int c, __kernel_size_t size)
void *__real_memchr_inv(const void *s, int c, size_t n) __RENAME(memchr_inv);
__FORTIFY_INLINE void *memchr_inv(const void * const POS0 p, int c, size_t size)
{
- size_t p_size = __builtin_object_size(p, 0);
+ size_t p_size = __struct_size(p);
- if (__builtin_constant_p(size) && p_size < size)
+ if (__compiletime_lessthan(p_size, size))
__read_overflow();
if (p_size < size)
fortify_panic(__func__);
return __real_memchr_inv(p, c, size);
}
-extern void *__real_kmemdup(const void *src, size_t len, gfp_t gfp) __RENAME(kmemdup);
+extern void *__real_kmemdup(const void *src, size_t len, gfp_t gfp) __RENAME(kmemdup)
+ __realloc_size(2);
__FORTIFY_INLINE void *kmemdup(const void * const POS0 p, size_t size, gfp_t gfp)
{
- size_t p_size = __builtin_object_size(p, 0);
+ size_t p_size = __struct_size(p);
- if (__builtin_constant_p(size) && p_size < size)
+ if (__compiletime_lessthan(p_size, size))
__read_overflow();
if (p_size < size)
fortify_panic(__func__);
return __real_kmemdup(p, size, gfp);
}
+/**
+ * strcpy - Copy a string into another string buffer
+ *
+ * @p: pointer to destination of copy
+ * @q: pointer to NUL-terminated source string to copy
+ *
+ * Do not use this function. While FORTIFY_SOURCE tries to avoid
+ * overflows, this is only possible when the sizes of @q and @p are
+ * known to the compiler. Prefer strscpy(), though note its different
+ * return values for detecting truncation.
+ *
+ * Returns @p.
+ *
+ */
/* Defined after fortified strlen to reuse it. */
__FORTIFY_INLINE __diagnose_as(__builtin_strcpy, 1, 2)
char *strcpy(char * const POS p, const char * const POS q)
{
- size_t p_size = __builtin_object_size(p, 1);
- size_t q_size = __builtin_object_size(q, 1);
+ size_t p_size = __member_size(p);
+ size_t q_size = __member_size(q);
size_t size;
/* If neither buffer size is known, immediately give up. */
- if (p_size == (size_t)-1 && q_size == (size_t)-1)
+ if (__builtin_constant_p(p_size) &&
+ __builtin_constant_p(q_size) &&
+ p_size == SIZE_MAX && q_size == SIZE_MAX)
return __underlying_strcpy(p, q);
size = strlen(q) + 1;
/* Compile-time check for const size overflow. */
- if (__builtin_constant_p(size) && p_size < size)
+ if (__compiletime_lessthan(p_size, size))
__write_overflow();
/* Run-time check for dynamic size overflow. */
if (p_size < size)
diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index 0621c5f86c39..b303472255be 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -8,9 +8,11 @@
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/atomic.h>
+#include <linux/jump_label.h>
#ifdef CONFIG_FREEZER
-extern atomic_t system_freezing_cnt; /* nr of freezing conds in effect */
+DECLARE_STATIC_KEY_FALSE(freezer_active);
+
extern bool pm_freezing; /* PM freezing in effect */
extern bool pm_nosig_freezing; /* PM nosig freezing in effect */
@@ -22,10 +24,7 @@ extern unsigned int freeze_timeout_msecs;
/*
* Check if a process has been frozen
*/
-static inline bool frozen(struct task_struct *p)
-{
- return p->flags & PF_FROZEN;
-}
+extern bool frozen(struct task_struct *p);
extern bool freezing_slow_path(struct task_struct *p);
@@ -34,9 +33,10 @@ extern bool freezing_slow_path(struct task_struct *p);
*/
static inline bool freezing(struct task_struct *p)
{
- if (likely(!atomic_read(&system_freezing_cnt)))
- return false;
- return freezing_slow_path(p);
+ if (static_branch_unlikely(&freezer_active))
+ return freezing_slow_path(p);
+
+ return false;
}
/* Takes and releases task alloc lock using task_lock() */
@@ -48,23 +48,14 @@ extern int freeze_kernel_threads(void);
extern void thaw_processes(void);
extern void thaw_kernel_threads(void);
-/*
- * DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION
- * If try_to_freeze causes a lockdep warning it means the caller may deadlock
- */
-static inline bool try_to_freeze_unsafe(void)
+static inline bool try_to_freeze(void)
{
might_sleep();
if (likely(!freezing(current)))
return false;
- return __refrigerator(false);
-}
-
-static inline bool try_to_freeze(void)
-{
if (!(current->flags & PF_NOFREEZE))
debug_check_no_locks_held();
- return try_to_freeze_unsafe();
+ return __refrigerator(false);
}
extern bool freeze_task(struct task_struct *p);
@@ -79,195 +70,6 @@ static inline bool cgroup_freezing(struct task_struct *task)
}
#endif /* !CONFIG_CGROUP_FREEZER */
-/*
- * The PF_FREEZER_SKIP flag should be set by a vfork parent right before it
- * calls wait_for_completion(&vfork) and reset right after it returns from this
- * function. Next, the parent should call try_to_freeze() to freeze itself
- * appropriately in case the child has exited before the freezing of tasks is
- * complete. However, we don't want kernel threads to be frozen in unexpected
- * places, so we allow them to block freeze_processes() instead or to set
- * PF_NOFREEZE if needed. Fortunately, in the ____call_usermodehelper() case the
- * parent won't really block freeze_processes(), since ____call_usermodehelper()
- * (the child) does a little before exec/exit and it can't be frozen before
- * waking up the parent.
- */
-
-
-/**
- * freezer_do_not_count - tell freezer to ignore %current
- *
- * Tell freezers to ignore the current task when determining whether the
- * target frozen state is reached. IOW, the current task will be
- * considered frozen enough by freezers.
- *
- * The caller shouldn't do anything which isn't allowed for a frozen task
- * until freezer_cont() is called. Usually, freezer[_do_not]_count() pair
- * wrap a scheduling operation and nothing much else.
- */
-static inline void freezer_do_not_count(void)
-{
- current->flags |= PF_FREEZER_SKIP;
-}
-
-/**
- * freezer_count - tell freezer to stop ignoring %current
- *
- * Undo freezer_do_not_count(). It tells freezers that %current should be
- * considered again and tries to freeze if freezing condition is already in
- * effect.
- */
-static inline void freezer_count(void)
-{
- current->flags &= ~PF_FREEZER_SKIP;
- /*
- * If freezing is in progress, the following paired with smp_mb()
- * in freezer_should_skip() ensures that either we see %true
- * freezing() or freezer_should_skip() sees !PF_FREEZER_SKIP.
- */
- smp_mb();
- try_to_freeze();
-}
-
-/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */
-static inline void freezer_count_unsafe(void)
-{
- current->flags &= ~PF_FREEZER_SKIP;
- smp_mb();
- try_to_freeze_unsafe();
-}
-
-/**
- * freezer_should_skip - whether to skip a task when determining frozen
- * state is reached
- * @p: task in quesion
- *
- * This function is used by freezers after establishing %true freezing() to
- * test whether a task should be skipped when determining the target frozen
- * state is reached. IOW, if this function returns %true, @p is considered
- * frozen enough.
- */
-static inline bool freezer_should_skip(struct task_struct *p)
-{
- /*
- * The following smp_mb() paired with the one in freezer_count()
- * ensures that either freezer_count() sees %true freezing() or we
- * see cleared %PF_FREEZER_SKIP and return %false. This makes it
- * impossible for a task to slip frozen state testing after
- * clearing %PF_FREEZER_SKIP.
- */
- smp_mb();
- return p->flags & PF_FREEZER_SKIP;
-}
-
-/*
- * These functions are intended to be used whenever you want allow a sleeping
- * task to be frozen. Note that neither return any clear indication of
- * whether a freeze event happened while in this function.
- */
-
-/* Like schedule(), but should not block the freezer. */
-static inline void freezable_schedule(void)
-{
- freezer_do_not_count();
- schedule();
- freezer_count();
-}
-
-/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */
-static inline void freezable_schedule_unsafe(void)
-{
- freezer_do_not_count();
- schedule();
- freezer_count_unsafe();
-}
-
-/*
- * Like schedule_timeout(), but should not block the freezer. Do not
- * call this with locks held.
- */
-static inline long freezable_schedule_timeout(long timeout)
-{
- long __retval;
- freezer_do_not_count();
- __retval = schedule_timeout(timeout);
- freezer_count();
- return __retval;
-}
-
-/*
- * Like schedule_timeout_interruptible(), but should not block the freezer. Do not
- * call this with locks held.
- */
-static inline long freezable_schedule_timeout_interruptible(long timeout)
-{
- long __retval;
- freezer_do_not_count();
- __retval = schedule_timeout_interruptible(timeout);
- freezer_count();
- return __retval;
-}
-
-/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */
-static inline long freezable_schedule_timeout_interruptible_unsafe(long timeout)
-{
- long __retval;
-
- freezer_do_not_count();
- __retval = schedule_timeout_interruptible(timeout);
- freezer_count_unsafe();
- return __retval;
-}
-
-/* Like schedule_timeout_killable(), but should not block the freezer. */
-static inline long freezable_schedule_timeout_killable(long timeout)
-{
- long __retval;
- freezer_do_not_count();
- __retval = schedule_timeout_killable(timeout);
- freezer_count();
- return __retval;
-}
-
-/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */
-static inline long freezable_schedule_timeout_killable_unsafe(long timeout)
-{
- long __retval;
- freezer_do_not_count();
- __retval = schedule_timeout_killable(timeout);
- freezer_count_unsafe();
- return __retval;
-}
-
-/*
- * Like schedule_hrtimeout_range(), but should not block the freezer. Do not
- * call this with locks held.
- */
-static inline int freezable_schedule_hrtimeout_range(ktime_t *expires,
- u64 delta, const enum hrtimer_mode mode)
-{
- int __retval;
- freezer_do_not_count();
- __retval = schedule_hrtimeout_range(expires, delta, mode);
- freezer_count();
- return __retval;
-}
-
-/*
- * Freezer-friendly wrappers around wait_event_interruptible(),
- * wait_event_killable() and wait_event_interruptible_timeout(), originally
- * defined in <linux/wait.h>
- */
-
-/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */
-#define wait_event_freezekillable_unsafe(wq, condition) \
-({ \
- int __retval; \
- freezer_do_not_count(); \
- __retval = wait_event_killable(wq, (condition)); \
- freezer_count_unsafe(); \
- __retval; \
-})
-
#else /* !CONFIG_FREEZER */
static inline bool frozen(struct task_struct *p) { return false; }
static inline bool freezing(struct task_struct *p) { return false; }
@@ -281,35 +83,8 @@ static inline void thaw_kernel_threads(void) {}
static inline bool try_to_freeze(void) { return false; }
-static inline void freezer_do_not_count(void) {}
-static inline void freezer_count(void) {}
-static inline int freezer_should_skip(struct task_struct *p) { return 0; }
static inline void set_freezable(void) {}
-#define freezable_schedule() schedule()
-
-#define freezable_schedule_unsafe() schedule()
-
-#define freezable_schedule_timeout(timeout) schedule_timeout(timeout)
-
-#define freezable_schedule_timeout_interruptible(timeout) \
- schedule_timeout_interruptible(timeout)
-
-#define freezable_schedule_timeout_interruptible_unsafe(timeout) \
- schedule_timeout_interruptible(timeout)
-
-#define freezable_schedule_timeout_killable(timeout) \
- schedule_timeout_killable(timeout)
-
-#define freezable_schedule_timeout_killable_unsafe(timeout) \
- schedule_timeout_killable(timeout)
-
-#define freezable_schedule_hrtimeout_range(expires, delta, mode) \
- schedule_hrtimeout_range(expires, delta, mode)
-
-#define wait_event_freezekillable_unsafe(wq, condition) \
- wait_event_killable(wq, condition)
-
#endif /* !CONFIG_FREEZER */
#endif /* FREEZER_H_INCLUDED */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9eced4cc286e..066555ad1bf8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -560,8 +560,8 @@ struct posix_acl;
#define ACL_NOT_CACHED ((void *)(-1))
/*
* ACL_DONT_CACHE is for stacked filesystems, that rely on underlying fs to
- * cache the ACL. This also means that ->get_acl() can be called in RCU mode
- * with the LOOKUP_RCU flag.
+ * cache the ACL. This also means that ->get_inode_acl() can be called in RCU
+ * mode with the LOOKUP_RCU flag.
*/
#define ACL_DONT_CACHE ((void *)(-3))
@@ -1131,9 +1131,8 @@ struct file_lock_context {
/* The following constant reflects the upper bound of the file/locking space */
#ifndef OFFSET_MAX
-#define INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1)))
-#define OFFSET_MAX INT_LIMIT(loff_t)
-#define OFFT_OFFSET_MAX INT_LIMIT(off_t)
+#define OFFSET_MAX type_max(loff_t)
+#define OFFT_OFFSET_MAX type_max(off_t)
#endif
extern void send_sigio(struct fown_struct *fown, int fd, int band);
@@ -1170,6 +1169,7 @@ extern int locks_delete_block(struct file_lock *);
extern int vfs_test_lock(struct file *, struct file_lock *);
extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *);
extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl);
+bool vfs_inode_has_locks(struct inode *inode);
extern int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl);
extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type);
extern void lease_get_mtime(struct inode *, struct timespec64 *time);
@@ -1186,6 +1186,13 @@ extern void show_fd_locks(struct seq_file *f,
struct file *filp, struct files_struct *files);
extern bool locks_owner_has_blockers(struct file_lock_context *flctx,
fl_owner_t owner);
+
+static inline struct file_lock_context *
+locks_inode_context(const struct inode *inode)
+{
+ return smp_load_acquire(&inode->i_flctx);
+}
+
#else /* !CONFIG_FILE_LOCKING */
static inline int fcntl_getlk(struct file *file, unsigned int cmd,
struct flock __user *user)
@@ -1284,6 +1291,11 @@ static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
return 0;
}
+static inline bool vfs_inode_has_locks(struct inode *inode)
+{
+ return false;
+}
+
static inline int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl)
{
return -ENOLCK;
@@ -1326,6 +1338,13 @@ static inline bool locks_owner_has_blockers(struct file_lock_context *flctx,
{
return false;
}
+
+static inline struct file_lock_context *
+locks_inode_context(const struct inode *inode)
+{
+ return NULL;
+}
+
#endif /* !CONFIG_FILE_LOCKING */
static inline struct inode *file_inode(const struct file *f)
@@ -1472,7 +1491,7 @@ struct super_block {
const struct xattr_handler **s_xattr;
#ifdef CONFIG_FS_ENCRYPTION
const struct fscrypt_operations *s_cop;
- struct key *s_master_keys; /* master crypto keys in use */
+ struct fscrypt_keyring *s_master_keys; /* master crypto keys in use */
#endif
#ifdef CONFIG_FS_VERITY
const struct fsverity_operations *s_vop;
@@ -1613,23 +1632,6 @@ static inline void i_gid_write(struct inode *inode, gid_t gid)
}
/**
- * i_uid_into_mnt - map an inode's i_uid down into a mnt_userns
- * @mnt_userns: user namespace of the mount the inode was found from
- * @inode: inode to map
- *
- * Note, this will eventually be removed completely in favor of the type-safe
- * i_uid_into_vfsuid().
- *
- * Return: the inode's i_uid mapped down according to @mnt_userns.
- * If the inode's i_uid has no mapping INVALID_UID is returned.
- */
-static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns,
- const struct inode *inode)
-{
- return AS_KUIDT(make_vfsuid(mnt_userns, i_user_ns(inode), inode->i_uid));
-}
-
-/**
* i_uid_into_vfsuid - map an inode's i_uid down into a mnt_userns
* @mnt_userns: user namespace of the mount the inode was found from
* @inode: inode to map
@@ -1682,23 +1684,6 @@ static inline void i_uid_update(struct user_namespace *mnt_userns,
}
/**
- * i_gid_into_mnt - map an inode's i_gid down into a mnt_userns
- * @mnt_userns: user namespace of the mount the inode was found from
- * @inode: inode to map
- *
- * Note, this will eventually be removed completely in favor of the type-safe
- * i_gid_into_vfsgid().
- *
- * Return: the inode's i_gid mapped down according to @mnt_userns.
- * If the inode's i_gid has no mapping INVALID_GID is returned.
- */
-static inline kgid_t i_gid_into_mnt(struct user_namespace *mnt_userns,
- const struct inode *inode)
-{
- return AS_KGIDT(make_vfsgid(mnt_userns, i_user_ns(inode), inode->i_gid));
-}
-
-/**
* i_gid_into_vfsgid - map an inode's i_gid down into a mnt_userns
* @mnt_userns: user namespace of the mount the inode was found from
* @inode: inode to map
@@ -2004,8 +1989,9 @@ static inline int vfs_whiteout(struct user_namespace *mnt_userns,
WHITEOUT_DEV);
}
-struct dentry *vfs_tmpfile(struct user_namespace *mnt_userns,
- struct dentry *dentry, umode_t mode, int open_flag);
+struct file *vfs_tmpfile_open(struct user_namespace *mnt_userns,
+ const struct path *parentpath,
+ umode_t mode, int open_flag, const struct cred *cred);
int vfs_mkobj(struct dentry *, umode_t,
int (*f)(struct dentry *, umode_t, void *),
@@ -2038,9 +2024,10 @@ umode_t mode_strip_sgid(struct user_namespace *mnt_userns,
* the kernel specify what kind of dirent layout it wants to have.
* This allows the kernel to read directories into kernel space or
* to have different dirent layouts depending on the binary type.
+ * Return 'true' to keep going and 'false' if there are no more entries.
*/
struct dir_context;
-typedef int (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64,
+typedef bool (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64,
unsigned);
struct dir_context {
@@ -2087,6 +2074,14 @@ struct dir_context {
*/
#define REMAP_FILE_ADVISORY (REMAP_FILE_CAN_SHORTEN)
+/*
+ * These flags control the behavior of vfs_copy_file_range().
+ * They are not available to the user via syscall.
+ *
+ * COPY_FILE_SPLICE: call splice direct instead of fs clone/copy ops
+ */
+#define COPY_FILE_SPLICE (1 << 0)
+
struct iov_iter;
struct io_uring_cmd;
@@ -2132,13 +2127,15 @@ struct file_operations {
loff_t len, unsigned int remap_flags);
int (*fadvise)(struct file *, loff_t, loff_t, int);
int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
+ int (*uring_cmd_iopoll)(struct io_uring_cmd *, struct io_comp_batch *,
+ unsigned int poll_flags);
} __randomize_layout;
struct inode_operations {
struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *);
int (*permission) (struct user_namespace *, struct inode *, int);
- struct posix_acl * (*get_acl)(struct inode *, int, bool);
+ struct posix_acl * (*get_inode_acl)(struct inode *, int, bool);
int (*readlink) (struct dentry *, char __user *,int);
@@ -2167,8 +2164,10 @@ struct inode_operations {
struct file *, unsigned open_flag,
umode_t create_mode);
int (*tmpfile) (struct user_namespace *, struct inode *,
- struct dentry *, umode_t);
- int (*set_acl)(struct user_namespace *, struct inode *,
+ struct file *, umode_t);
+ struct posix_acl *(*get_acl)(struct user_namespace *, struct dentry *,
+ int);
+ int (*set_acl)(struct user_namespace *, struct dentry *,
struct posix_acl *, int);
int (*fileattr_set)(struct user_namespace *mnt_userns,
struct dentry *dentry, struct fileattr *fa);
@@ -2371,13 +2370,14 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
* don't have to write inode on fdatasync() when only
* e.g. the timestamps have changed.
* I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean.
- * I_DIRTY_TIME The inode itself only has dirty timestamps, and the
+ * I_DIRTY_TIME The inode itself has dirty timestamps, and the
* lazytime mount option is enabled. We keep track of this
* separately from I_DIRTY_SYNC in order to implement
* lazytime. This gets cleared if I_DIRTY_INODE
- * (I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set. I.e.
- * either I_DIRTY_TIME *or* I_DIRTY_INODE can be set in
- * i_state, but not both. I_DIRTY_PAGES may still be set.
+ * (I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set. But
+ * I_DIRTY_TIME can still be set if I_DIRTY_SYNC is already
+ * in place because writeback might already be in progress
+ * and we don't want to lose the time update
* I_NEW Serves as both a mutex and completion notification.
* New inodes set I_NEW. If two processes both create
* the same inode, one of them will release its inode and
@@ -2727,18 +2727,22 @@ static inline struct user_namespace *file_mnt_user_ns(struct file *file)
return mnt_user_ns(file->f_path.mnt);
}
+static inline struct mnt_idmap *file_mnt_idmap(struct file *file)
+{
+ return mnt_idmap(file->f_path.mnt);
+}
+
/**
* is_idmapped_mnt - check whether a mount is mapped
* @mnt: the mount to check
*
- * If @mnt has an idmapping attached different from the
- * filesystem's idmapping then @mnt is mapped.
+ * If @mnt has an non @nop_mnt_idmap attached to it then @mnt is mapped.
*
* Return: true if mount is mapped, false if not.
*/
static inline bool is_idmapped_mnt(const struct vfsmount *mnt)
{
- return mnt_user_ns(mnt) != mnt->mnt_sb->s_user_ns;
+ return mnt_idmap(mnt) != &nop_mnt_idmap;
}
extern long vfs_truncate(const struct path *, loff_t);
@@ -2779,6 +2783,15 @@ extern int finish_open(struct file *file, struct dentry *dentry,
int (*open)(struct inode *, struct file *));
extern int finish_no_open(struct file *file, struct dentry *dentry);
+/* Helper for the simple case when original dentry is used */
+static inline int finish_open_simple(struct file *file, int error)
+{
+ if (error)
+ return error;
+
+ return finish_open(file, file->f_path.dentry, NULL);
+}
+
/* fs/dcache.c */
extern void __init vfs_caches_init_early(void);
extern void __init vfs_caches_init(void);
@@ -3090,7 +3103,7 @@ extern void __destroy_inode(struct inode *);
extern struct inode *new_inode_pseudo(struct super_block *sb);
extern struct inode *new_inode(struct super_block *sb);
extern void free_inode_nonrcu(struct inode *inode);
-extern int should_remove_suid(struct dentry *);
+extern int setattr_should_drop_suidgid(struct user_namespace *, struct inode *);
extern int file_remove_privs(struct file *);
/*
@@ -3471,7 +3484,7 @@ void simple_transaction_set(struct file *file, size_t n);
* All attributes contain a text representation of a numeric value
* that are accessed with the get() and set() functions.
*/
-#define DEFINE_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \
+#define DEFINE_SIMPLE_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, __is_signed) \
static int __fops ## _open(struct inode *inode, struct file *file) \
{ \
__simple_attr_check_format(__fmt, 0ull); \
@@ -3482,10 +3495,16 @@ static const struct file_operations __fops = { \
.open = __fops ## _open, \
.release = simple_attr_release, \
.read = simple_attr_read, \
- .write = simple_attr_write, \
+ .write = (__is_signed) ? simple_attr_write_signed : simple_attr_write, \
.llseek = generic_file_llseek, \
}
+#define DEFINE_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \
+ DEFINE_SIMPLE_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, false)
+
+#define DEFINE_SIMPLE_ATTRIBUTE_SIGNED(__fops, __get, __set, __fmt) \
+ DEFINE_SIMPLE_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, true)
+
static inline __printf(1, 2)
void __simple_attr_check_format(const char *fmt, ...)
{
@@ -3500,6 +3519,8 @@ ssize_t simple_attr_read(struct file *file, char __user *buf,
size_t len, loff_t *ppos);
ssize_t simple_attr_write(struct file *file, const char __user *buf,
size_t len, loff_t *ppos);
+ssize_t simple_attr_write_signed(struct file *file, const char __user *buf,
+ size_t len, loff_t *ppos);
struct ctl_table;
int __init list_bdev_fs_names(char *buf, size_t size);
@@ -3513,7 +3534,7 @@ int __init list_bdev_fs_names(char *buf, size_t size);
static inline bool is_sxid(umode_t mode)
{
- return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP));
+ return mode & (S_ISUID | S_ISGID);
}
static inline int check_sticky(struct user_namespace *mnt_userns,
@@ -3540,17 +3561,17 @@ static inline bool dir_emit(struct dir_context *ctx,
const char *name, int namelen,
u64 ino, unsigned type)
{
- return ctx->actor(ctx, name, namelen, ctx->pos, ino, type) == 0;
+ return ctx->actor(ctx, name, namelen, ctx->pos, ino, type);
}
static inline bool dir_emit_dot(struct file *file, struct dir_context *ctx)
{
return ctx->actor(ctx, ".", 1, ctx->pos,
- file->f_path.dentry->d_inode->i_ino, DT_DIR) == 0;
+ file->f_path.dentry->d_inode->i_ino, DT_DIR);
}
static inline bool dir_emit_dotdot(struct file *file, struct dir_context *ctx)
{
return ctx->actor(ctx, "..", 2, ctx->pos,
- parent_ino(file->f_path.dentry), DT_DIR) == 0;
+ parent_ino(file->f_path.dentry), DT_DIR);
}
static inline bool dir_emit_dots(struct file *file, struct dir_context *ctx)
{
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index 13fa6f3df8e4..5469ffee21c7 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -99,7 +99,7 @@ struct fs_context {
const struct cred *cred; /* The mounter's credentials */
struct p_log log; /* Logging buffer */
const char *source; /* The source name (eg. dev path) */
- void *security; /* Linux S&M options */
+ void *security; /* LSM options */
void *s_fs_info; /* Proposed s_fs_info */
unsigned int sb_flags; /* Proposed superblock flags (SB_*) */
unsigned int sb_flags_mask; /* Superblock flags that were changed */
@@ -145,20 +145,6 @@ extern void fc_drop_locked(struct fs_context *fc);
int reconfigure_single(struct super_block *s,
int flags, void *data);
-/*
- * sget() wrappers to be called from the ->get_tree() op.
- */
-enum vfs_get_super_keying {
- vfs_get_single_super, /* Only one such superblock may exist */
- vfs_get_single_reconf_super, /* As above, but reconfigure if it exists */
- vfs_get_keyed_super, /* Superblocks with different s_fs_info keys may exist */
- vfs_get_independent_super, /* Multiple independent superblocks may exist */
-};
-extern int vfs_get_super(struct fs_context *fc,
- enum vfs_get_super_keying keying,
- int (*fill_super)(struct super_block *sb,
- struct fs_context *fc));
-
extern int get_tree_nodev(struct fs_context *fc,
int (*fill_super)(struct super_block *sb,
struct fs_context *fc));
diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h
index f103c91139d4..01542c4b87a2 100644
--- a/include/linux/fs_parser.h
+++ b/include/linux/fs_parser.h
@@ -76,6 +76,7 @@ static inline int fs_parse(struct fs_context *fc,
extern int fs_lookup_param(struct fs_context *fc,
struct fs_parameter *param,
bool want_bdev,
+ unsigned int flags,
struct path *_path);
extern int lookup_constant(const struct constant_table tbl[], const char *name, int not_found);
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index 720874e6ee94..8e312c8323a8 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -75,7 +75,7 @@ struct fscache_volume {
atomic_t n_accesses; /* Number of cache accesses in progress */
unsigned int debug_id;
unsigned int key_hash; /* Hash of key string */
- char *key; /* Volume ID, eg. "afs@example.com@1234" */
+ u8 *key; /* Volume ID, eg. "afs@example.com@1234" */
struct list_head proc_link; /* Link in /proc/fs/fscache/volumes */
struct hlist_bl_node hash_link; /* Link in hash table */
struct work_struct work;
@@ -258,7 +258,7 @@ struct fscache_cookie *fscache_acquire_cookie(struct fscache_volume *volume,
/**
* fscache_use_cookie - Request usage of cookie attached to an object
- * @object: Object description
+ * @cookie: The cookie representing the cache object
* @will_modify: If cache is expected to be modified locally
*
* Request usage of the cookie attached to an object. The caller should tell
@@ -274,7 +274,7 @@ static inline void fscache_use_cookie(struct fscache_cookie *cookie,
/**
* fscache_unuse_cookie - Cease usage of cookie attached to an object
- * @object: Object description
+ * @cookie: The cookie representing the cache object
* @aux_data: Updated auxiliary data (or NULL)
* @object_size: Revised size of the object (or NULL)
*
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index 7d2f1e0f23b1..4f5f8a651213 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -161,24 +161,21 @@ struct fscrypt_operations {
int *ino_bits_ret, int *lblk_bits_ret);
/*
- * Return the number of block devices to which the filesystem may write
- * encrypted file contents.
+ * Return an array of pointers to the block devices to which the
+ * filesystem may write encrypted file contents, NULL if the filesystem
+ * only has a single such block device, or an ERR_PTR() on error.
+ *
+ * On successful non-NULL return, *num_devs is set to the number of
+ * devices in the returned array. The caller must free the returned
+ * array using kfree().
*
* If the filesystem can use multiple block devices (other than block
* devices that aren't used for encrypted file contents, such as
* external journal devices), and wants to support inline encryption,
* then it must implement this function. Otherwise it's not needed.
*/
- int (*get_num_devices)(struct super_block *sb);
-
- /*
- * If ->get_num_devices() returns a value greater than 1, then this
- * function is called to get the array of request_queues that the
- * filesystem is using -- one per block device. (There may be duplicate
- * entries in this array, as block devices can share a request_queue.)
- */
- void (*get_devices)(struct super_block *sb,
- struct request_queue **devs);
+ struct block_device **(*get_devices)(struct super_block *sb,
+ unsigned int *num_devs);
};
static inline struct fscrypt_info *fscrypt_get_info(const struct inode *inode)
@@ -295,8 +292,6 @@ int fscrypt_parse_test_dummy_encryption(const struct fs_parameter *param,
struct fscrypt_dummy_policy *dummy_policy);
bool fscrypt_dummy_policies_equal(const struct fscrypt_dummy_policy *p1,
const struct fscrypt_dummy_policy *p2);
-int fscrypt_set_test_dummy_encryption(struct super_block *sb, const char *arg,
- struct fscrypt_dummy_policy *dummy_policy);
void fscrypt_show_test_dummy_encryption(struct seq_file *seq, char sep,
struct super_block *sb);
static inline bool
@@ -312,7 +307,7 @@ fscrypt_free_dummy_policy(struct fscrypt_dummy_policy *dummy_policy)
}
/* keyring.c */
-void fscrypt_sb_free(struct super_block *sb);
+void fscrypt_destroy_keyring(struct super_block *sb);
int fscrypt_ioctl_add_key(struct file *filp, void __user *arg);
int fscrypt_add_test_dummy_key(struct super_block *sb,
const struct fscrypt_dummy_policy *dummy_policy);
@@ -353,7 +348,7 @@ u64 fscrypt_fname_siphash(const struct inode *dir, const struct qstr *name);
int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags);
/* bio.c */
-void fscrypt_decrypt_bio(struct bio *bio);
+bool fscrypt_decrypt_bio(struct bio *bio);
int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
sector_t pblk, unsigned int len);
@@ -526,7 +521,7 @@ fscrypt_free_dummy_policy(struct fscrypt_dummy_policy *dummy_policy)
}
/* keyring.c */
-static inline void fscrypt_sb_free(struct super_block *sb)
+static inline void fscrypt_destroy_keyring(struct super_block *sb)
{
}
@@ -646,8 +641,9 @@ static inline int fscrypt_d_revalidate(struct dentry *dentry,
}
/* bio.c */
-static inline void fscrypt_decrypt_bio(struct bio *bio)
+static inline bool fscrypt_decrypt_bio(struct bio *bio)
{
+ return true;
}
static inline int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
@@ -768,7 +764,7 @@ bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode,
bool fscrypt_mergeable_bio_bh(struct bio *bio,
const struct buffer_head *next_bh);
-bool fscrypt_dio_supported(struct kiocb *iocb, struct iov_iter *iter);
+bool fscrypt_dio_supported(struct inode *inode);
u64 fscrypt_limit_io_blocks(const struct inode *inode, u64 lblk, u64 nr_blocks);
@@ -801,11 +797,8 @@ static inline bool fscrypt_mergeable_bio_bh(struct bio *bio,
return true;
}
-static inline bool fscrypt_dio_supported(struct kiocb *iocb,
- struct iov_iter *iter)
+static inline bool fscrypt_dio_supported(struct inode *inode)
{
- const struct inode *inode = file_inode(iocb->ki_filp);
-
return !fscrypt_needs_contents_encryption(inode);
}
diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h
index 7af030fa3c36..40f14e5fed9d 100644
--- a/include/linux/fsverity.h
+++ b/include/linux/fsverity.h
@@ -22,6 +22,9 @@
*/
#define FS_VERITY_MAX_DIGEST_SIZE SHA512_DIGEST_SIZE
+/* Arbitrary limit to bound the kmalloc() size. Can be changed. */
+#define FS_VERITY_MAX_DESCRIPTOR_SIZE 16384
+
/* Verity operations for filesystems */
struct fsverity_operations {
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 0b61371e287b..99f1146614c0 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -37,9 +37,10 @@ extern void ftrace_boot_snapshot(void);
static inline void ftrace_boot_snapshot(void) { }
#endif
-#ifdef CONFIG_FUNCTION_TRACER
struct ftrace_ops;
struct ftrace_regs;
+
+#ifdef CONFIG_FUNCTION_TRACER
/*
* If the arch's mcount caller does not support all of ftrace's
* features, then it must call an indirect function that
@@ -110,12 +111,11 @@ struct ftrace_regs {
#define arch_ftrace_get_regs(fregs) (&(fregs)->regs)
/*
- * ftrace_instruction_pointer_set() is to be defined by the architecture
- * if to allow setting of the instruction pointer from the ftrace_regs
- * when HAVE_DYNAMIC_FTRACE_WITH_ARGS is set and it supports
- * live kernel patching.
+ * ftrace_regs_set_instruction_pointer() is to be defined by the architecture
+ * if to allow setting of the instruction pointer from the ftrace_regs when
+ * HAVE_DYNAMIC_FTRACE_WITH_ARGS is set and it supports live kernel patching.
*/
-#define ftrace_instruction_pointer_set(fregs, ip) do { } while (0)
+#define ftrace_regs_set_instruction_pointer(fregs, ip) do { } while (0)
#endif /* CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */
static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs *fregs)
@@ -126,6 +126,35 @@ static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs *fregs
return arch_ftrace_get_regs(fregs);
}
+/*
+ * When true, the ftrace_regs_{get,set}_*() functions may be used on fregs.
+ * Note: this can be true even when ftrace_get_regs() cannot provide a pt_regs.
+ */
+static __always_inline bool ftrace_regs_has_args(struct ftrace_regs *fregs)
+{
+ if (IS_ENABLED(CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS))
+ return true;
+
+ return ftrace_get_regs(fregs) != NULL;
+}
+
+#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
+#define ftrace_regs_get_instruction_pointer(fregs) \
+ instruction_pointer(ftrace_get_regs(fregs))
+#define ftrace_regs_get_argument(fregs, n) \
+ regs_get_kernel_argument(ftrace_get_regs(fregs), n)
+#define ftrace_regs_get_stack_pointer(fregs) \
+ kernel_stack_pointer(ftrace_get_regs(fregs))
+#define ftrace_regs_return_value(fregs) \
+ regs_return_value(ftrace_get_regs(fregs))
+#define ftrace_regs_set_return_value(fregs, ret) \
+ regs_set_return_value(ftrace_get_regs(fregs), ret)
+#define ftrace_override_function_with_return(fregs) \
+ override_function_with_return(ftrace_get_regs(fregs))
+#define ftrace_regs_query_register_offset(name) \
+ regs_query_register_offset(name)
+#endif
+
typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct ftrace_regs *fregs);
@@ -427,9 +456,7 @@ static inline int modify_ftrace_direct_multi_nolock(struct ftrace_ops *ops, unsi
{
return -ENODEV;
}
-#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
-#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
/*
* This must be implemented by the architecture.
* It is the way the ftrace direct_ops helper, when called
@@ -443,9 +470,9 @@ static inline int modify_ftrace_direct_multi_nolock(struct ftrace_ops *ops, unsi
* the return from the trampoline jump to the direct caller
* instead of going back to the function it just traced.
*/
-static inline void arch_ftrace_set_direct_caller(struct pt_regs *regs,
+static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs,
unsigned long addr) { }
-#endif /* CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
#ifdef CONFIG_STACK_TRACER
@@ -1122,47 +1149,6 @@ static inline void unpause_graph_tracing(void) { }
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
#ifdef CONFIG_TRACING
-
-/* flags for current->trace */
-enum {
- TSK_TRACE_FL_TRACE_BIT = 0,
- TSK_TRACE_FL_GRAPH_BIT = 1,
-};
-enum {
- TSK_TRACE_FL_TRACE = 1 << TSK_TRACE_FL_TRACE_BIT,
- TSK_TRACE_FL_GRAPH = 1 << TSK_TRACE_FL_GRAPH_BIT,
-};
-
-static inline void set_tsk_trace_trace(struct task_struct *tsk)
-{
- set_bit(TSK_TRACE_FL_TRACE_BIT, &tsk->trace);
-}
-
-static inline void clear_tsk_trace_trace(struct task_struct *tsk)
-{
- clear_bit(TSK_TRACE_FL_TRACE_BIT, &tsk->trace);
-}
-
-static inline int test_tsk_trace_trace(struct task_struct *tsk)
-{
- return tsk->trace & TSK_TRACE_FL_TRACE;
-}
-
-static inline void set_tsk_trace_graph(struct task_struct *tsk)
-{
- set_bit(TSK_TRACE_FL_GRAPH_BIT, &tsk->trace);
-}
-
-static inline void clear_tsk_trace_graph(struct task_struct *tsk)
-{
- clear_bit(TSK_TRACE_FL_GRAPH_BIT, &tsk->trace);
-}
-
-static inline int test_tsk_trace_graph(struct task_struct *tsk)
-{
- return tsk->trace & TSK_TRACE_FL_GRAPH;
-}
-
enum ftrace_dump_mode;
extern enum ftrace_dump_mode ftrace_dump_on_oops;
diff --git a/include/linux/gameport.h b/include/linux/gameport.h
index 69081d899492..8c2f00018e89 100644
--- a/include/linux/gameport.h
+++ b/include/linux/gameport.h
@@ -110,7 +110,7 @@ static inline void gameport_free_port(struct gameport *gameport)
static inline void gameport_set_name(struct gameport *gameport, const char *name)
{
- strlcpy(gameport->name, name, sizeof(gameport->name));
+ strscpy(gameport->name, name, sizeof(gameport->name));
}
/*
diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h
index 939b1a8f571b..4a4b387181ad 100644
--- a/include/linux/genl_magic_func.h
+++ b/include/linux/genl_magic_func.h
@@ -294,6 +294,7 @@ static struct genl_family ZZZ_genl_family __ro_after_init = {
.ops = ZZZ_genl_ops,
.n_ops = ARRAY_SIZE(ZZZ_genl_ops),
.mcgrps = ZZZ_genl_mcgrps,
+ .resv_start_op = 42, /* drbd is currently the only user */
.n_mcgrps = ARRAY_SIZE(ZZZ_genl_mcgrps),
.module = THIS_MODULE,
};
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index f314be58fa77..65a78773dcca 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -18,6 +18,9 @@ static inline int gfp_migratetype(const gfp_t gfp_flags)
VM_WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);
BUILD_BUG_ON((1UL << GFP_MOVABLE_SHIFT) != ___GFP_MOVABLE);
BUILD_BUG_ON((___GFP_MOVABLE >> GFP_MOVABLE_SHIFT) != MIGRATE_MOVABLE);
+ BUILD_BUG_ON((___GFP_RECLAIMABLE >> GFP_MOVABLE_SHIFT) != MIGRATE_RECLAIMABLE);
+ BUILD_BUG_ON(((___GFP_MOVABLE | ___GFP_RECLAIMABLE) >>
+ GFP_MOVABLE_SHIFT) != MIGRATE_HIGHATOMIC);
if (unlikely(page_group_by_mobility_disabled))
return MIGRATE_UNMOVABLE;
@@ -33,29 +36,6 @@ static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
return !!(gfp_flags & __GFP_DIRECT_RECLAIM);
}
-/**
- * gfpflags_normal_context - is gfp_flags a normal sleepable context?
- * @gfp_flags: gfp_flags to test
- *
- * Test whether @gfp_flags indicates that the allocation is from the
- * %current context and allowed to sleep.
- *
- * An allocation being allowed to block doesn't mean it owns the %current
- * context. When direct reclaim path tries to allocate memory, the
- * allocation context is nested inside whatever %current was doing at the
- * time of the original allocation. The nested allocation may be allowed
- * to block but modifying anything %current owns can corrupt the outer
- * context's expectations.
- *
- * %true result from this function indicates that the allocation context
- * can sleep and use anything that's associated with %current.
- */
-static inline bool gfpflags_normal_context(const gfp_t gfp_flags)
-{
- return (gfp_flags & (__GFP_DIRECT_RECLAIM | __GFP_MEMALLOC)) ==
- __GFP_DIRECT_RECLAIM;
-}
-
#ifdef CONFIG_HIGHMEM
#define OPT_ZONE_HIGHMEM ZONE_HIGHMEM
#else
@@ -230,6 +210,20 @@ alloc_pages_bulk_array_node(gfp_t gfp, int nid, unsigned long nr_pages, struct p
return __alloc_pages_bulk(gfp, nid, NULL, nr_pages, NULL, page_array);
}
+static inline void warn_if_node_offline(int this_node, gfp_t gfp_mask)
+{
+ gfp_t warn_gfp = gfp_mask & (__GFP_THISNODE|__GFP_NOWARN);
+
+ if (warn_gfp != (__GFP_THISNODE|__GFP_NOWARN))
+ return;
+
+ if (node_online(this_node))
+ return;
+
+ pr_warn("%pGg allocation from offline node %d\n", &gfp_mask, this_node);
+ dump_stack();
+}
+
/*
* Allocate pages, preferring the node given as nid. The node must be valid and
* online. For more general interface, see alloc_pages_node().
@@ -238,7 +232,7 @@ static inline struct page *
__alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
{
VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES);
- VM_WARN_ON((gfp_mask & __GFP_THISNODE) && !node_online(nid));
+ warn_if_node_offline(nid, gfp_mask);
return __alloc_pages(gfp_mask, order, nid, NULL);
}
@@ -247,7 +241,7 @@ static inline
struct folio *__folio_alloc_node(gfp_t gfp, unsigned int order, int nid)
{
VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES);
- VM_WARN_ON((gfp & __GFP_THISNODE) && !node_online(nid));
+ warn_if_node_offline(nid, gfp);
return __folio_alloc(gfp, order, nid, NULL);
}
diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index a370387fa406..346f60bbab30 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -98,9 +98,9 @@ int devm_gpio_request_one(struct device *dev, unsigned gpio,
#else /* ! CONFIG_GPIOLIB */
+#include <linux/bug.h>
#include <linux/kernel.h>
#include <linux/types.h>
-#include <linux/bug.h>
struct device;
struct gpio_chip;
diff --git a/include/linux/gpio/aspeed.h b/include/linux/gpio/aspeed.h
index 1bfb3cdc86d0..9a547e66c8c4 100644
--- a/include/linux/gpio/aspeed.h
+++ b/include/linux/gpio/aspeed.h
@@ -1,6 +1,10 @@
#ifndef __GPIO_ASPEED_H
#define __GPIO_ASPEED_H
+#include <linux/types.h>
+
+struct gpio_desc;
+
struct aspeed_gpio_copro_ops {
int (*request_access)(void *data);
int (*release_access)(void *data);
diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index fe0f460d9a3b..36460ced060b 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -174,10 +174,6 @@ int desc_to_gpio(const struct gpio_desc *desc);
/* Child properties interface */
struct fwnode_handle;
-struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode,
- const char *propname, int index,
- enum gpiod_flags dflags,
- const char *label);
struct gpio_desc *fwnode_gpiod_get_index(struct fwnode_handle *fwnode,
const char *con_id, int index,
enum gpiod_flags flags,
@@ -554,15 +550,6 @@ static inline int desc_to_gpio(const struct gpio_desc *desc)
struct fwnode_handle;
static inline
-struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode,
- const char *propname, int index,
- enum gpiod_flags dflags,
- const char *label)
-{
- return ERR_PTR(-ENOSYS);
-}
-
-static inline
struct gpio_desc *fwnode_gpiod_get_index(struct fwnode_handle *fwnode,
const char *con_id, int index,
enum gpiod_flags flags,
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 6aeea1071b1b..44783fc16125 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -7,8 +7,8 @@
#include <linux/irqchip/chained_irq.h>
#include <linux/irqdomain.h>
#include <linux/lockdep.h>
-#include <linux/pinctrl/pinctrl.h>
#include <linux/pinctrl/pinconf-generic.h>
+#include <linux/pinctrl/pinctrl.h>
#include <linux/property.h>
#include <linux/types.h>
@@ -27,7 +27,7 @@ struct gpio_chip;
union gpio_irq_fwspec {
struct irq_fwspec fwspec;
-#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
+#ifdef CONFIG_GENERIC_MSI_IRQ
msi_alloc_info_t msiinfo;
#endif
};
diff --git a/include/linux/gpio/gpio-reg.h b/include/linux/gpio/gpio-reg.h
index 39b888c40b39..3913b6660ed1 100644
--- a/include/linux/gpio/gpio-reg.h
+++ b/include/linux/gpio/gpio-reg.h
@@ -2,9 +2,13 @@
#ifndef GPIO_REG_H
#define GPIO_REG_H
+#include <linux/types.h>
+
struct device;
struct irq_domain;
+struct gpio_chip;
+
struct gpio_chip *gpio_reg_init(struct device *dev, void __iomem *reg,
int base, int num, const char *label, u32 direction, u32 def_out,
const char *const *names, struct irq_domain *irqdom, const int *irqs);
diff --git a/include/linux/gpio/machine.h b/include/linux/gpio/machine.h
index 0b619eb7ae83..44e5f162973e 100644
--- a/include/linux/gpio/machine.h
+++ b/include/linux/gpio/machine.h
@@ -3,7 +3,6 @@
#define __LINUX_GPIO_MACHINE_H
#include <linux/types.h>
-#include <linux/list.h>
enum gpio_lookup_flags {
GPIO_ACTIVE_HIGH = (0 << 0),
diff --git a/include/linux/hdmi.h b/include/linux/hdmi.h
index c8ec982ff498..2f4dcc8d060e 100644
--- a/include/linux/hdmi.h
+++ b/include/linux/hdmi.h
@@ -336,7 +336,12 @@ ssize_t hdmi_audio_infoframe_pack(struct hdmi_audio_infoframe *frame,
void *buffer, size_t size);
ssize_t hdmi_audio_infoframe_pack_only(const struct hdmi_audio_infoframe *frame,
void *buffer, size_t size);
-int hdmi_audio_infoframe_check(struct hdmi_audio_infoframe *frame);
+int hdmi_audio_infoframe_check(const struct hdmi_audio_infoframe *frame);
+
+struct dp_sdp;
+ssize_t
+hdmi_audio_infoframe_pack_for_dp(const struct hdmi_audio_infoframe *frame,
+ struct dp_sdp *sdp, u8 dp_version);
enum hdmi_3d_structure {
HDMI_3D_STRUCTURE_INVALID = -1,
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 4363a63b9775..8677ae38599e 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -314,15 +314,6 @@ struct hid_item {
#define HID_BAT_ABSOLUTESTATEOFCHARGE 0x00850065
#define HID_VD_ASUS_CUSTOM_MEDIA_KEYS 0xff310076
-/*
- * HID report types --- Ouch! HID spec says 1 2 3!
- */
-
-#define HID_INPUT_REPORT 0
-#define HID_OUTPUT_REPORT 1
-#define HID_FEATURE_REPORT 2
-
-#define HID_REPORT_TYPES 3
/*
* HID connect requests
@@ -509,7 +500,7 @@ struct hid_report {
struct list_head hidinput_list;
struct list_head field_entry_list; /* ordered list of input fields */
unsigned int id; /* id of this report */
- unsigned int type; /* report type */
+ enum hid_report_type type; /* report type */
unsigned int application; /* application usage for this report */
struct hid_field *field[HID_MAX_FIELDS]; /* fields of the report */
struct hid_field_entry *field_entries; /* allocated memory of input field_entry */
@@ -658,6 +649,8 @@ struct hid_device { /* device report descriptor */
struct list_head debug_list;
spinlock_t debug_list_lock;
wait_queue_head_t debug_wait;
+
+ unsigned int id; /* system unique id */
};
#define to_hid_device(pdev) \
@@ -924,20 +917,21 @@ extern int hidinput_connect(struct hid_device *hid, unsigned int force);
extern void hidinput_disconnect(struct hid_device *);
int hid_set_field(struct hid_field *, unsigned, __s32);
-int hid_input_report(struct hid_device *, int type, u8 *, u32, int);
+int hid_input_report(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 size,
+ int interrupt);
struct hid_field *hidinput_get_led_field(struct hid_device *hid);
unsigned int hidinput_count_leds(struct hid_device *hid);
__s32 hidinput_calc_abs_res(const struct hid_field *field, __u16 code);
void hid_output_report(struct hid_report *report, __u8 *data);
-int __hid_request(struct hid_device *hid, struct hid_report *rep, int reqtype);
+int __hid_request(struct hid_device *hid, struct hid_report *rep, enum hid_class_request reqtype);
u8 *hid_alloc_report_buf(struct hid_report *report, gfp_t flags);
struct hid_device *hid_allocate_device(void);
struct hid_report *hid_register_report(struct hid_device *device,
- unsigned int type, unsigned int id,
+ enum hid_report_type type, unsigned int id,
unsigned int application);
int hid_parse_report(struct hid_device *hid, __u8 *start, unsigned size);
struct hid_report *hid_validate_values(struct hid_device *hid,
- unsigned int type, unsigned int id,
+ enum hid_report_type type, unsigned int id,
unsigned int field_index,
unsigned int report_counts);
@@ -1106,10 +1100,11 @@ void hid_hw_stop(struct hid_device *hdev);
int __must_check hid_hw_open(struct hid_device *hdev);
void hid_hw_close(struct hid_device *hdev);
void hid_hw_request(struct hid_device *hdev,
- struct hid_report *report, int reqtype);
+ struct hid_report *report, enum hid_class_request reqtype);
int hid_hw_raw_request(struct hid_device *hdev,
unsigned char reportnum, __u8 *buf,
- size_t len, unsigned char rtype, int reqtype);
+ size_t len, enum hid_report_type rtype,
+ enum hid_class_request reqtype);
int hid_hw_output_report(struct hid_device *hdev, __u8 *buf, size_t len);
/**
@@ -1137,7 +1132,7 @@ static inline int hid_hw_power(struct hid_device *hdev, int level)
* @reqtype: hid request type
*/
static inline int hid_hw_idle(struct hid_device *hdev, int report, int idle,
- int reqtype)
+ enum hid_class_request reqtype)
{
if (hdev->ll_driver->idle)
return hdev->ll_driver->idle(hdev, report, idle, reqtype);
@@ -1182,8 +1177,8 @@ static inline u32 hid_report_len(struct hid_report *report)
return DIV_ROUND_UP(report->size, 8) + (report->id > 0);
}
-int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, u32 size,
- int interrupt);
+int hid_report_raw_event(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 size,
+ int interrupt);
/* HID quirks API */
unsigned long hid_lookup_quirk(const struct hid_device *hdev);
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 25679035ca28..44242268f53b 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -6,6 +6,7 @@
#include <linux/kernel.h>
#include <linux/bug.h>
#include <linux/cacheflush.h>
+#include <linux/kmsan.h>
#include <linux/mm.h>
#include <linux/uaccess.h>
#include <linux/hardirq.h>
@@ -311,12 +312,39 @@ static inline void copy_user_highpage(struct page *to, struct page *from,
vfrom = kmap_local_page(from);
vto = kmap_local_page(to);
copy_user_page(vto, vfrom, vaddr, to);
+ kmsan_unpoison_memory(page_address(to), PAGE_SIZE);
kunmap_local(vto);
kunmap_local(vfrom);
}
#endif
+#ifdef copy_mc_to_kernel
+static inline int copy_mc_user_highpage(struct page *to, struct page *from,
+ unsigned long vaddr, struct vm_area_struct *vma)
+{
+ unsigned long ret;
+ char *vfrom, *vto;
+
+ vfrom = kmap_local_page(from);
+ vto = kmap_local_page(to);
+ ret = copy_mc_to_kernel(vto, vfrom, PAGE_SIZE);
+ if (!ret)
+ kmsan_unpoison_memory(page_address(to), PAGE_SIZE);
+ kunmap_local(vto);
+ kunmap_local(vfrom);
+
+ return ret;
+}
+#else
+static inline int copy_mc_user_highpage(struct page *to, struct page *from,
+ unsigned long vaddr, struct vm_area_struct *vma)
+{
+ copy_user_highpage(to, from, vaddr, vma);
+ return 0;
+}
+#endif
+
#ifndef __HAVE_ARCH_COPY_HIGHPAGE
static inline void copy_highpage(struct page *to, struct page *from)
@@ -326,6 +354,7 @@ static inline void copy_highpage(struct page *to, struct page *from)
vfrom = kmap_local_page(from);
vto = kmap_local_page(to);
copy_page(vto, vfrom);
+ kmsan_copy_page_meta(to, from);
kunmap_local(vto);
kunmap_local(vfrom);
}
diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h
index 116e8bd68c99..be3aedaa96dc 100644
--- a/include/linux/hisi_acc_qm.h
+++ b/include/linux/hisi_acc_qm.h
@@ -87,29 +87,6 @@
#define PEH_AXUSER_CFG 0x401001
#define PEH_AXUSER_CFG_ENABLE 0xffffffff
-#define QM_AXI_RRESP BIT(0)
-#define QM_AXI_BRESP BIT(1)
-#define QM_ECC_MBIT BIT(2)
-#define QM_ECC_1BIT BIT(3)
-#define QM_ACC_GET_TASK_TIMEOUT BIT(4)
-#define QM_ACC_DO_TASK_TIMEOUT BIT(5)
-#define QM_ACC_WB_NOT_READY_TIMEOUT BIT(6)
-#define QM_SQ_CQ_VF_INVALID BIT(7)
-#define QM_CQ_VF_INVALID BIT(8)
-#define QM_SQ_VF_INVALID BIT(9)
-#define QM_DB_TIMEOUT BIT(10)
-#define QM_OF_FIFO_OF BIT(11)
-#define QM_DB_RANDOM_INVALID BIT(12)
-#define QM_MAILBOX_TIMEOUT BIT(13)
-#define QM_FLR_TIMEOUT BIT(14)
-
-#define QM_BASE_NFE (QM_AXI_RRESP | QM_AXI_BRESP | QM_ECC_MBIT | \
- QM_ACC_GET_TASK_TIMEOUT | QM_DB_TIMEOUT | \
- QM_OF_FIFO_OF | QM_DB_RANDOM_INVALID | \
- QM_MAILBOX_TIMEOUT | QM_FLR_TIMEOUT)
-#define QM_BASE_CE QM_ECC_1BIT
-
-#define QM_Q_DEPTH 1024
#define QM_MIN_QNUM 2
#define HISI_ACC_SGL_SGE_NR_MAX 255
#define QM_SHAPER_CFG 0x100164
@@ -168,6 +145,15 @@ enum qm_vf_state {
QM_NOT_READY,
};
+enum qm_cap_bits {
+ QM_SUPPORT_DB_ISOLATION = 0x0,
+ QM_SUPPORT_FUNC_QOS,
+ QM_SUPPORT_STOP_QP,
+ QM_SUPPORT_MB_COMMAND,
+ QM_SUPPORT_SVA_PREFETCH,
+ QM_SUPPORT_RPM,
+};
+
struct dfx_diff_registers {
u32 *regs;
u32 reg_offset;
@@ -232,7 +218,10 @@ struct hisi_qm_err_info {
char *acpi_rst;
u32 msi_wr_port;
u32 ecc_2bits_mask;
- u32 dev_ce_mask;
+ u32 qm_shutdown_mask;
+ u32 dev_shutdown_mask;
+ u32 qm_reset_mask;
+ u32 dev_reset_mask;
u32 ce;
u32 nfe;
u32 fe;
@@ -258,6 +247,18 @@ struct hisi_qm_err_ini {
void (*err_info_init)(struct hisi_qm *qm);
};
+struct hisi_qm_cap_info {
+ u32 type;
+ /* Register offset */
+ u32 offset;
+ /* Bit offset in register */
+ u32 shift;
+ u32 mask;
+ u32 v1_val;
+ u32 v2_val;
+ u32 v3_val;
+};
+
struct hisi_qm_list {
struct mutex lock;
struct list_head list;
@@ -278,6 +279,9 @@ struct hisi_qm {
struct pci_dev *pdev;
void __iomem *io_base;
void __iomem *db_io_base;
+
+ /* Capbility version, 0: not supports */
+ u32 cap_ver;
u32 sqe_size;
u32 qp_base;
u32 qp_num;
@@ -286,6 +290,8 @@ struct hisi_qm {
u32 max_qp_num;
u32 vfs_num;
u32 db_interval;
+ u16 eq_depth;
+ u16 aeq_depth;
struct list_head list;
struct hisi_qm_list *qm_list;
@@ -304,6 +310,8 @@ struct hisi_qm {
struct hisi_qm_err_info err_info;
struct hisi_qm_err_status err_status;
unsigned long misc_ctl; /* driver removing and reset sched */
+ /* Device capability bit */
+ unsigned long caps;
struct rw_semaphore qps_lock;
struct idr qp_idr;
@@ -326,8 +334,6 @@ struct hisi_qm {
bool use_sva;
bool is_frozen;
- /* doorbell isolation enable */
- bool use_db_isolation;
resource_size_t phys_base;
resource_size_t db_phys_base;
struct uacce_device *uacce;
@@ -351,6 +357,8 @@ struct hisi_qp_ops {
struct hisi_qp {
u32 qp_id;
+ u16 sq_depth;
+ u16 cq_depth;
u8 alg_type;
u8 req_type;
@@ -376,14 +384,14 @@ struct hisi_qp {
static inline int q_num_set(const char *val, const struct kernel_param *kp,
unsigned int device)
{
- struct pci_dev *pdev = pci_get_device(PCI_VENDOR_ID_HUAWEI,
- device, NULL);
+ struct pci_dev *pdev;
u32 n, q_num;
int ret;
if (!val)
return -EINVAL;
+ pdev = pci_get_device(PCI_VENDOR_ID_HUAWEI, device, NULL);
if (!pdev) {
q_num = min_t(u32, QM_QNUM_V1, QM_QNUM_V2);
pr_info("No device found currently, suppose queue number is %u\n",
@@ -393,6 +401,8 @@ static inline int q_num_set(const char *val, const struct kernel_param *kp,
q_num = QM_QNUM_V1;
else
q_num = QM_QNUM_V2;
+
+ pci_dev_put(pdev);
}
ret = kstrtou32(val, 10, &n);
@@ -461,11 +471,11 @@ int hisi_qm_sriov_disable(struct pci_dev *pdev, bool is_frozen);
int hisi_qm_sriov_configure(struct pci_dev *pdev, int num_vfs);
void hisi_qm_dev_err_init(struct hisi_qm *qm);
void hisi_qm_dev_err_uninit(struct hisi_qm *qm);
-int hisi_qm_diff_regs_init(struct hisi_qm *qm,
- struct dfx_diff_registers *dregs, int reg_len);
-void hisi_qm_diff_regs_uninit(struct hisi_qm *qm, int reg_len);
+int hisi_qm_regs_debugfs_init(struct hisi_qm *qm,
+ struct dfx_diff_registers *dregs, u32 reg_len);
+void hisi_qm_regs_debugfs_uninit(struct hisi_qm *qm, u32 reg_len);
void hisi_qm_acc_diff_regs_dump(struct hisi_qm *qm, struct seq_file *s,
- struct dfx_diff_registers *dregs, int regs_len);
+ struct dfx_diff_registers *dregs, u32 regs_len);
pci_ers_result_t hisi_qm_dev_err_detected(struct pci_dev *pdev,
pci_channel_state_t state);
@@ -501,6 +511,9 @@ void hisi_qm_pm_init(struct hisi_qm *qm);
int hisi_qm_get_dfx_access(struct hisi_qm *qm);
void hisi_qm_put_dfx_access(struct hisi_qm *qm);
void hisi_qm_regs_dump(struct seq_file *s, struct debugfs_regset32 *regset);
+u32 hisi_qm_get_hw_info(struct hisi_qm *qm,
+ const struct hisi_qm_cap_info *info_table,
+ u32 index, bool is_read);
/* Used by VFIO ACC live migration driver */
struct pci_driver *hisi_sec_get_pf_driver(void);
diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index cb2100d9b0ff..dc55d9d3b94f 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -469,11 +469,13 @@ struct host1x_memory_context {
#ifdef CONFIG_IOMMU_API
struct host1x_memory_context *host1x_memory_context_alloc(struct host1x *host1x,
+ struct device *dev,
struct pid *pid);
void host1x_memory_context_get(struct host1x_memory_context *cd);
void host1x_memory_context_put(struct host1x_memory_context *cd);
#else
static inline struct host1x_memory_context *host1x_memory_context_alloc(struct host1x *host1x,
+ struct device *dev,
struct pid *pid)
{
return NULL;
diff --git a/include/linux/hp_sdc.h b/include/linux/hp_sdc.h
index 6f1dee7e67e0..9be8704e2d38 100644
--- a/include/linux/hp_sdc.h
+++ b/include/linux/hp_sdc.h
@@ -180,7 +180,7 @@ switch (val) { \
#define HP_SDC_CMD_SET_IM 0x40 /* 010xxxxx == set irq mask */
-/* The documents provided do not explicitly state that all registers betweem
+/* The documents provided do not explicitly state that all registers between
* 0x01 and 0x1f inclusive can be read by sending their register index as a
* command, but this is implied and appears to be the case.
*/
diff --git a/include/linux/hpet.h b/include/linux/hpet.h
index 8604564b985d..21e69eaf7a36 100644
--- a/include/linux/hpet.h
+++ b/include/linux/hpet.h
@@ -30,7 +30,7 @@ struct hpet {
unsigned long _hpet_compare;
} _u1;
u64 hpet_fsb[2]; /* FSB route */
- } hpet_timers[1];
+ } hpet_timers[];
};
#define hpet_mc _u0._hpet_mc
diff --git a/include/linux/htcpld.h b/include/linux/htcpld.h
index 842fce69ac06..5f8ac9b1d724 100644
--- a/include/linux/htcpld.h
+++ b/include/linux/htcpld.h
@@ -13,8 +13,6 @@ struct htcpld_chip_platform_data {
};
struct htcpld_core_platform_data {
- unsigned int int_reset_gpio_hi;
- unsigned int int_reset_gpio_lo;
unsigned int i2c_adapter_id;
struct htcpld_chip_platform_data *chip;
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 768e5261fdae..a1341fdcf666 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -168,9 +168,8 @@ static inline bool file_thp_enabled(struct vm_area_struct *vma)
!inode_is_open_for_write(inode) && S_ISREG(inode->i_mode);
}
-bool hugepage_vma_check(struct vm_area_struct *vma,
- unsigned long vm_flags,
- bool smaps, bool in_pf);
+bool hugepage_vma_check(struct vm_area_struct *vma, unsigned long vm_flags,
+ bool smaps, bool in_pf, bool enforce_sysfs);
#define transparent_hugepage_use_zero_page() \
(transparent_hugepage_flags & \
@@ -219,6 +218,9 @@ void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,
int hugepage_madvise(struct vm_area_struct *vma, unsigned long *vm_flags,
int advice);
+int madvise_collapse(struct vm_area_struct *vma,
+ struct vm_area_struct **prev,
+ unsigned long start, unsigned long end);
void vma_adjust_trans_huge(struct vm_area_struct *vma, unsigned long start,
unsigned long end, long adjust_next);
spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma);
@@ -321,8 +323,8 @@ static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
}
static inline bool hugepage_vma_check(struct vm_area_struct *vma,
- unsigned long vm_flags,
- bool smaps, bool in_pf)
+ unsigned long vm_flags, bool smaps,
+ bool in_pf, bool enforce_sysfs)
{
return false;
}
@@ -362,9 +364,16 @@ static inline void split_huge_pmd_address(struct vm_area_struct *vma,
static inline int hugepage_madvise(struct vm_area_struct *vma,
unsigned long *vm_flags, int advice)
{
- BUG();
- return 0;
+ return -EINVAL;
+}
+
+static inline int madvise_collapse(struct vm_area_struct *vma,
+ struct vm_area_struct **prev,
+ unsigned long start, unsigned long end)
+{
+ return -EINVAL;
}
+
static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
unsigned long start,
unsigned long end,
@@ -435,6 +444,11 @@ static inline int split_folio_to_list(struct folio *folio,
return split_huge_page_to_list(&folio->page, list);
}
+static inline int split_folio(struct folio *folio)
+{
+ return split_folio_to_list(folio, NULL);
+}
+
/*
* archs that select ARCH_WANTS_THP_SWAP but don't support THP_SWP due to
* limitations in the implementation like arm64 MTE can override this to
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 3ec981a0d8b3..551834cd5299 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -16,8 +16,9 @@
struct ctl_table;
struct user_struct;
struct mmu_gather;
+struct node;
-#ifndef is_hugepd
+#ifndef CONFIG_ARCH_HAS_HUGEPD
typedef struct { unsigned long pd; } hugepd_t;
#define is_hugepd(hugepd) (0)
#define __hugepd(x) ((hugepd_t) { (x) })
@@ -32,22 +33,9 @@ typedef struct { unsigned long pd; } hugepd_t;
/*
* For HugeTLB page, there are more metadata to save in the struct page. But
* the head struct page cannot meet our needs, so we have to abuse other tail
- * struct page to store the metadata. In order to avoid conflicts caused by
- * subsequent use of more tail struct pages, we gather these discrete indexes
- * of tail struct page here.
+ * struct page to store the metadata.
*/
-enum {
- SUBPAGE_INDEX_SUBPOOL = 1, /* reuse page->private */
-#ifdef CONFIG_CGROUP_HUGETLB
- SUBPAGE_INDEX_CGROUP, /* reuse page->private */
- SUBPAGE_INDEX_CGROUP_RSVD, /* reuse page->private */
- __MAX_CGROUP_SUBPAGE_INDEX = SUBPAGE_INDEX_CGROUP_RSVD,
-#endif
-#ifdef CONFIG_MEMORY_FAILURE
- SUBPAGE_INDEX_HWPOISON,
-#endif
- __NR_USED_SUBPAGE,
-};
+#define __NR_USED_SUBPAGE 3
struct hugepage_subpool {
spinlock_t lock;
@@ -114,6 +102,12 @@ struct file_region {
#endif
};
+struct hugetlb_vma_lock {
+ struct kref refs;
+ struct rw_semaphore rw_sema;
+ struct vm_area_struct *vma;
+};
+
extern struct resv_map *resv_map_alloc(void);
void resv_map_release(struct kref *ref);
@@ -126,7 +120,7 @@ struct hugepage_subpool *hugepage_new_subpool(struct hstate *h, long max_hpages,
long min_hpages);
void hugepage_put_subpool(struct hugepage_subpool *spool);
-void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
+void hugetlb_dup_vma_private(struct vm_area_struct *vma);
void clear_vma_resv_huge_pages(struct vm_area_struct *vma);
int hugetlb_sysctl_handler(struct ctl_table *, int, void *, size_t *, loff_t *);
int hugetlb_overcommit_handler(struct ctl_table *, int, void *, size_t *,
@@ -142,6 +136,8 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
unsigned long len);
int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *,
struct vm_area_struct *, struct vm_area_struct *);
+struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
+ unsigned long address, unsigned int flags);
long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
struct page **, struct vm_area_struct **,
unsigned long *, unsigned long *, long, unsigned int,
@@ -174,10 +170,11 @@ bool hugetlb_reserve_pages(struct inode *inode, long from, long to,
long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
long freed);
int isolate_hugetlb(struct page *page, struct list_head *list);
-int get_hwpoison_huge_page(struct page *page, bool *hugetlb);
-int get_huge_page_for_hwpoison(unsigned long pfn, int flags);
+int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison);
+int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
+ bool *migratable_cleared);
void putback_active_hugepage(struct page *page);
-void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason);
+void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason);
void free_huge_page(struct page *page);
void hugetlb_fix_reserve_counts(struct inode *inode);
extern struct mutex *hugetlb_fault_mutex_table;
@@ -202,17 +199,14 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep);
void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
unsigned long *start, unsigned long *end);
-struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
- int write);
-struct page *follow_huge_pd(struct vm_area_struct *vma,
- unsigned long address, hugepd_t hpd,
- int flags, int pdshift);
-struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
- pmd_t *pmd, int flags);
-struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address,
- pud_t *pud, int flags);
-struct page *follow_huge_pgd(struct mm_struct *mm, unsigned long address,
- pgd_t *pgd, int flags);
+
+void hugetlb_vma_lock_read(struct vm_area_struct *vma);
+void hugetlb_vma_unlock_read(struct vm_area_struct *vma);
+void hugetlb_vma_lock_write(struct vm_area_struct *vma);
+void hugetlb_vma_unlock_write(struct vm_area_struct *vma);
+int hugetlb_vma_trylock_write(struct vm_area_struct *vma);
+void hugetlb_vma_assert_locked(struct vm_area_struct *vma);
+void hugetlb_vma_lock_release(struct kref *kref);
int pmd_huge(pmd_t pmd);
int pud_huge(pud_t pud);
@@ -225,7 +219,7 @@ void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
#else /* !CONFIG_HUGETLB_PAGE */
-static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
+static inline void hugetlb_dup_vma_private(struct vm_area_struct *vma)
{
}
@@ -257,6 +251,12 @@ static inline void adjust_range_if_pmd_sharing_possible(
{
}
+static inline struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
+ unsigned long address, unsigned int flags)
+{
+ BUILD_BUG(); /* should never be compiled in if !CONFIG_HUGETLB_PAGE*/
+}
+
static inline long follow_hugetlb_page(struct mm_struct *mm,
struct vm_area_struct *vma, struct page **pages,
struct vm_area_struct **vmas, unsigned long *position,
@@ -267,12 +267,6 @@ static inline long follow_hugetlb_page(struct mm_struct *mm,
return 0;
}
-static inline struct page *follow_huge_addr(struct mm_struct *mm,
- unsigned long address, int write)
-{
- return ERR_PTR(-EINVAL);
-}
-
static inline int copy_hugetlb_page_range(struct mm_struct *dst,
struct mm_struct *src,
struct vm_area_struct *dst_vma,
@@ -305,35 +299,35 @@ static inline void hugetlb_show_meminfo_node(int nid)
{
}
-static inline struct page *follow_huge_pd(struct vm_area_struct *vma,
- unsigned long address, hugepd_t hpd, int flags,
- int pdshift)
+static inline int prepare_hugepage_range(struct file *file,
+ unsigned long addr, unsigned long len)
{
- return NULL;
+ return -EINVAL;
}
-static inline struct page *follow_huge_pmd(struct mm_struct *mm,
- unsigned long address, pmd_t *pmd, int flags)
+static inline void hugetlb_vma_lock_read(struct vm_area_struct *vma)
{
- return NULL;
}
-static inline struct page *follow_huge_pud(struct mm_struct *mm,
- unsigned long address, pud_t *pud, int flags)
+static inline void hugetlb_vma_unlock_read(struct vm_area_struct *vma)
{
- return NULL;
}
-static inline struct page *follow_huge_pgd(struct mm_struct *mm,
- unsigned long address, pgd_t *pgd, int flags)
+static inline void hugetlb_vma_lock_write(struct vm_area_struct *vma)
{
- return NULL;
}
-static inline int prepare_hugepage_range(struct file *file,
- unsigned long addr, unsigned long len)
+static inline void hugetlb_vma_unlock_write(struct vm_area_struct *vma)
+{
+}
+
+static inline int hugetlb_vma_trylock_write(struct vm_area_struct *vma)
+{
+ return 1;
+}
+
+static inline void hugetlb_vma_assert_locked(struct vm_area_struct *vma)
{
- return -EINVAL;
}
static inline int pmd_huge(pmd_t pmd)
@@ -385,12 +379,13 @@ static inline int isolate_hugetlb(struct page *page, struct list_head *list)
return -EBUSY;
}
-static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
+static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison)
{
return 0;
}
-static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags)
+static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
+ bool *migratable_cleared)
{
return 0;
}
@@ -399,8 +394,8 @@ static inline void putback_active_hugepage(struct page *page)
{
}
-static inline void move_hugetlb_state(struct page *oldpage,
- struct page *newpage, int reason)
+static inline void move_hugetlb_state(struct folio *old_folio,
+ struct folio *new_folio, int reason)
{
}
@@ -583,26 +578,50 @@ enum hugetlb_page_flags {
*/
#ifdef CONFIG_HUGETLB_PAGE
#define TESTHPAGEFLAG(uname, flname) \
+static __always_inline \
+bool folio_test_hugetlb_##flname(struct folio *folio) \
+ { void *private = &folio->private; \
+ return test_bit(HPG_##flname, private); \
+ } \
static inline int HPage##uname(struct page *page) \
{ return test_bit(HPG_##flname, &(page->private)); }
#define SETHPAGEFLAG(uname, flname) \
+static __always_inline \
+void folio_set_hugetlb_##flname(struct folio *folio) \
+ { void *private = &folio->private; \
+ set_bit(HPG_##flname, private); \
+ } \
static inline void SetHPage##uname(struct page *page) \
{ set_bit(HPG_##flname, &(page->private)); }
#define CLEARHPAGEFLAG(uname, flname) \
+static __always_inline \
+void folio_clear_hugetlb_##flname(struct folio *folio) \
+ { void *private = &folio->private; \
+ clear_bit(HPG_##flname, private); \
+ } \
static inline void ClearHPage##uname(struct page *page) \
{ clear_bit(HPG_##flname, &(page->private)); }
#else
#define TESTHPAGEFLAG(uname, flname) \
+static inline bool \
+folio_test_hugetlb_##flname(struct folio *folio) \
+ { return 0; } \
static inline int HPage##uname(struct page *page) \
{ return 0; }
#define SETHPAGEFLAG(uname, flname) \
+static inline void \
+folio_set_hugetlb_##flname(struct folio *folio) \
+ { } \
static inline void SetHPage##uname(struct page *page) \
{ }
#define CLEARHPAGEFLAG(uname, flname) \
+static inline void \
+folio_clear_hugetlb_##flname(struct folio *folio) \
+ { } \
static inline void ClearHPage##uname(struct page *page) \
{ }
#endif
@@ -665,7 +684,7 @@ struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
nodemask_t *nmask, gfp_t gfp_mask);
struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
unsigned long address);
-int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
+int hugetlb_add_to_page_cache(struct page *page, struct address_space *mapping,
pgoff_t idx);
void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
unsigned long address, struct page *page);
@@ -688,18 +707,29 @@ extern unsigned int default_hstate_idx;
#define default_hstate (hstates[default_hstate_idx])
+static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio)
+{
+ return folio->_hugetlb_subpool;
+}
+
/*
- * hugetlb page subpool pointer located in hpage[1].private
+ * hugetlb page subpool pointer located in hpage[2].hugetlb_subpool
*/
static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage)
{
- return (void *)page_private(hpage + SUBPAGE_INDEX_SUBPOOL);
+ return hugetlb_folio_subpool(page_folio(hpage));
+}
+
+static inline void hugetlb_set_folio_subpool(struct folio *folio,
+ struct hugepage_subpool *subpool)
+{
+ folio->_hugetlb_subpool = subpool;
}
static inline void hugetlb_set_page_subpool(struct page *hpage,
struct hugepage_subpool *subpool)
{
- set_page_private(hpage + SUBPAGE_INDEX_SUBPOOL, (unsigned long)subpool);
+ hugetlb_set_folio_subpool(page_folio(hpage), subpool);
}
static inline struct hstate *hstate_file(struct file *f)
@@ -783,10 +813,15 @@ static inline pte_t arch_make_huge_pte(pte_t entry, unsigned int shift,
}
#endif
+static inline struct hstate *folio_hstate(struct folio *folio)
+{
+ VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio);
+ return size_to_hstate(folio_size(folio));
+}
+
static inline struct hstate *page_hstate(struct page *page)
{
- VM_BUG_ON_PAGE(!PageHuge(page), page);
- return size_to_hstate(page_size(page));
+ return folio_hstate(page_folio(page));
}
static inline unsigned hstate_index_to_shift(unsigned index)
@@ -935,9 +970,19 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
}
#endif
+#ifdef CONFIG_NUMA
+void hugetlb_register_node(struct node *node);
+void hugetlb_unregister_node(struct node *node);
+#endif
+
#else /* CONFIG_HUGETLB_PAGE */
struct hstate {};
+static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio)
+{
+ return NULL;
+}
+
static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage)
{
return NULL;
@@ -990,6 +1035,11 @@ static inline struct hstate *hstate_vma(struct vm_area_struct *vma)
return NULL;
}
+static inline struct hstate *folio_hstate(struct folio *folio)
+{
+ return NULL;
+}
+
static inline struct hstate *page_hstate(struct page *page)
{
return NULL;
@@ -1109,6 +1159,14 @@ static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
}
+
+static inline void hugetlb_register_node(struct node *node)
+{
+}
+
+static inline void hugetlb_unregister_node(struct node *node)
+{
+}
#endif /* CONFIG_HUGETLB_PAGE */
static inline spinlock_t *huge_pte_lock(struct hstate *h,
@@ -1123,14 +1181,10 @@ static inline spinlock_t *huge_pte_lock(struct hstate *h,
#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_CMA)
extern void __init hugetlb_cma_reserve(int order);
-extern void __init hugetlb_cma_check(void);
#else
static inline __init void hugetlb_cma_reserve(int order)
{
}
-static inline __init void hugetlb_cma_check(void)
-{
-}
#endif
bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr);
diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
index 379344828e78..f706626a8063 100644
--- a/include/linux/hugetlb_cgroup.h
+++ b/include/linux/hugetlb_cgroup.h
@@ -24,12 +24,10 @@ struct file_region;
#ifdef CONFIG_CGROUP_HUGETLB
/*
* Minimum page order trackable by hugetlb cgroup.
- * At least 4 pages are necessary for all the tracking information.
- * The second tail page (hpage[SUBPAGE_INDEX_CGROUP]) is the fault
- * usage cgroup. The third tail page (hpage[SUBPAGE_INDEX_CGROUP_RSVD])
- * is the reservation usage cgroup.
+ * At least 3 pages are necessary for all the tracking information.
+ * The second tail page contains all of the hugetlb-specific fields.
*/
-#define HUGETLB_CGROUP_MIN_ORDER order_base_2(__MAX_CGROUP_SUBPAGE_INDEX + 1)
+#define HUGETLB_CGROUP_MIN_ORDER order_base_2(__NR_USED_SUBPAGE)
enum hugetlb_memory_event {
HUGETLB_MAX,
@@ -67,55 +65,50 @@ struct hugetlb_cgroup {
};
static inline struct hugetlb_cgroup *
-__hugetlb_cgroup_from_page(struct page *page, bool rsvd)
+__hugetlb_cgroup_from_folio(struct folio *folio, bool rsvd)
{
- VM_BUG_ON_PAGE(!PageHuge(page), page);
-
- if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER)
+ VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio);
+ if (folio_order(folio) < HUGETLB_CGROUP_MIN_ORDER)
return NULL;
if (rsvd)
- return (void *)page_private(page + SUBPAGE_INDEX_CGROUP_RSVD);
+ return folio->_hugetlb_cgroup_rsvd;
else
- return (void *)page_private(page + SUBPAGE_INDEX_CGROUP);
+ return folio->_hugetlb_cgroup;
}
-static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page)
+static inline struct hugetlb_cgroup *hugetlb_cgroup_from_folio(struct folio *folio)
{
- return __hugetlb_cgroup_from_page(page, false);
+ return __hugetlb_cgroup_from_folio(folio, false);
}
static inline struct hugetlb_cgroup *
-hugetlb_cgroup_from_page_rsvd(struct page *page)
+hugetlb_cgroup_from_folio_rsvd(struct folio *folio)
{
- return __hugetlb_cgroup_from_page(page, true);
+ return __hugetlb_cgroup_from_folio(folio, true);
}
-static inline int __set_hugetlb_cgroup(struct page *page,
+static inline void __set_hugetlb_cgroup(struct folio *folio,
struct hugetlb_cgroup *h_cg, bool rsvd)
{
- VM_BUG_ON_PAGE(!PageHuge(page), page);
-
- if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER)
- return -1;
+ VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio);
+ if (folio_order(folio) < HUGETLB_CGROUP_MIN_ORDER)
+ return;
if (rsvd)
- set_page_private(page + SUBPAGE_INDEX_CGROUP_RSVD,
- (unsigned long)h_cg);
+ folio->_hugetlb_cgroup_rsvd = h_cg;
else
- set_page_private(page + SUBPAGE_INDEX_CGROUP,
- (unsigned long)h_cg);
- return 0;
+ folio->_hugetlb_cgroup = h_cg;
}
-static inline int set_hugetlb_cgroup(struct page *page,
+static inline void set_hugetlb_cgroup(struct folio *folio,
struct hugetlb_cgroup *h_cg)
{
- return __set_hugetlb_cgroup(page, h_cg, false);
+ __set_hugetlb_cgroup(folio, h_cg, false);
}
-static inline int set_hugetlb_cgroup_rsvd(struct page *page,
+static inline void set_hugetlb_cgroup_rsvd(struct folio *folio,
struct hugetlb_cgroup *h_cg)
{
- return __set_hugetlb_cgroup(page, h_cg, true);
+ __set_hugetlb_cgroup(folio, h_cg, true);
}
static inline bool hugetlb_cgroup_disabled(void)
@@ -152,10 +145,10 @@ extern void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
extern void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages,
struct hugetlb_cgroup *h_cg,
struct page *page);
-extern void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
- struct page *page);
-extern void hugetlb_cgroup_uncharge_page_rsvd(int idx, unsigned long nr_pages,
- struct page *page);
+extern void hugetlb_cgroup_uncharge_folio(int idx, unsigned long nr_pages,
+ struct folio *folio);
+extern void hugetlb_cgroup_uncharge_folio_rsvd(int idx, unsigned long nr_pages,
+ struct folio *folio);
extern void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
struct hugetlb_cgroup *h_cg);
@@ -171,8 +164,8 @@ extern void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv,
bool region_del);
extern void hugetlb_cgroup_file_init(void) __init;
-extern void hugetlb_cgroup_migrate(struct page *oldhpage,
- struct page *newhpage);
+extern void hugetlb_cgroup_migrate(struct folio *old_folio,
+ struct folio *new_folio);
#else
static inline void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv,
@@ -182,33 +175,25 @@ static inline void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv,
{
}
-static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page)
+static inline struct hugetlb_cgroup *hugetlb_cgroup_from_folio(struct folio *folio)
{
return NULL;
}
static inline struct hugetlb_cgroup *
-hugetlb_cgroup_from_page_resv(struct page *page)
+hugetlb_cgroup_from_folio_rsvd(struct folio *folio)
{
return NULL;
}
-static inline struct hugetlb_cgroup *
-hugetlb_cgroup_from_page_rsvd(struct page *page)
-{
- return NULL;
-}
-
-static inline int set_hugetlb_cgroup(struct page *page,
+static inline void set_hugetlb_cgroup(struct folio *folio,
struct hugetlb_cgroup *h_cg)
{
- return 0;
}
-static inline int set_hugetlb_cgroup_rsvd(struct page *page,
+static inline void set_hugetlb_cgroup_rsvd(struct folio *folio,
struct hugetlb_cgroup *h_cg)
{
- return 0;
}
static inline bool hugetlb_cgroup_disabled(void)
@@ -256,14 +241,14 @@ hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages,
{
}
-static inline void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
- struct page *page)
+static inline void hugetlb_cgroup_uncharge_folio(int idx, unsigned long nr_pages,
+ struct folio *folio)
{
}
-static inline void hugetlb_cgroup_uncharge_page_rsvd(int idx,
+static inline void hugetlb_cgroup_uncharge_folio_rsvd(int idx,
unsigned long nr_pages,
- struct page *page)
+ struct folio *folio)
{
}
static inline void hugetlb_cgroup_uncharge_cgroup(int idx,
@@ -288,8 +273,8 @@ static inline void hugetlb_cgroup_file_init(void)
{
}
-static inline void hugetlb_cgroup_migrate(struct page *oldhpage,
- struct page *newhpage)
+static inline void hugetlb_cgroup_migrate(struct folio *old_folio,
+ struct folio *new_folio)
{
}
diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index 78dd7035d1e5..f319bd26b030 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -74,12 +74,12 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr,
extern int register_perf_hw_breakpoint(struct perf_event *bp);
extern void unregister_hw_breakpoint(struct perf_event *bp);
extern void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events);
+extern bool hw_breakpoint_is_used(void);
extern int dbg_reserve_bp_slot(struct perf_event *bp);
extern int dbg_release_bp_slot(struct perf_event *bp);
extern int reserve_bp_slot(struct perf_event *bp);
extern void release_bp_slot(struct perf_event *bp);
-int hw_breakpoint_weight(struct perf_event *bp);
int arch_reserve_bp_slot(struct perf_event *bp);
void arch_release_bp_slot(struct perf_event *bp);
void arch_unregister_hw_breakpoint(struct perf_event *bp);
@@ -121,6 +121,8 @@ register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; }
static inline void unregister_hw_breakpoint(struct perf_event *bp) { }
static inline void
unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events) { }
+static inline bool hw_breakpoint_is_used(void) { return false; }
+
static inline int
reserve_bp_slot(struct perf_event *bp) {return -ENOSYS; }
static inline void release_bp_slot(struct perf_event *bp) { }
diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h
index aa1d4da03538..8a3115516a1b 100644
--- a/include/linux/hw_random.h
+++ b/include/linux/hw_random.h
@@ -34,7 +34,7 @@
* @priv: Private data, for use by the RNG driver.
* @quality: Estimation of true entropy in RNG's bitstream
* (in bits of entropy per 1024 bits of input;
- * valid values: 1 to 1024, or 0 for unknown).
+ * valid values: 1 to 1024, or 0 for maximum).
*/
struct hwrng {
const char *name;
@@ -50,6 +50,7 @@ struct hwrng {
struct list_head list;
struct kref ref;
struct completion cleanup_done;
+ struct completion dying;
};
struct device;
@@ -61,4 +62,6 @@ extern int devm_hwrng_register(struct device *dev, struct hwrng *rng);
extern void hwrng_unregister(struct hwrng *rng);
extern void devm_hwrng_unregister(struct device *dve, struct hwrng *rng);
+extern long hwrng_msleep(struct hwrng *rng, unsigned int msecs);
+
#endif /* LINUX_HWRANDOM_H_ */
diff --git a/include/linux/hwmon-sysfs.h b/include/linux/hwmon-sysfs.h
index cb26d02f52f3..d896713359cd 100644
--- a/include/linux/hwmon-sysfs.h
+++ b/include/linux/hwmon-sysfs.h
@@ -8,6 +8,7 @@
#define _LINUX_HWMON_SYSFS_H
#include <linux/device.h>
+#include <linux/kstrtox.h>
struct sensor_device_attribute{
struct device_attribute dev_attr;
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 3b42264333ef..85f7c5a63aa6 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -969,7 +969,7 @@ struct vmbus_channel {
* mechanism improves throughput by:
*
* A) Making the host more efficient - each time it wakes up,
- * potentially it will process morev number of packets. The
+ * potentially it will process more number of packets. The
* monitor latency allows a batch to build up.
* B) By deferring the hypercall to signal, we will also minimize
* the interrupts.
@@ -1341,6 +1341,8 @@ struct hv_ring_buffer_debug_info {
int hv_ringbuffer_get_debuginfo(struct hv_ring_buffer_info *ring_info,
struct hv_ring_buffer_debug_info *debug_info);
+bool hv_ringbuffer_spinlock_busy(struct vmbus_channel *channel);
+
/* Vmbus interface */
#define vmbus_driver_register(driver) \
__vmbus_driver_register(driver, THIS_MODULE, KBUILD_MODNAME)
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 8eab5017bff3..d84e0e99f084 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -189,6 +189,7 @@ s32 i2c_smbus_read_i2c_block_data_or_emulated(const struct i2c_client *client,
u8 *values);
int i2c_get_device_id(const struct i2c_client *client,
struct i2c_device_identity *id);
+const struct i2c_device_id *i2c_client_get_device_id(const struct i2c_client *client);
#endif /* I2C */
/**
@@ -273,7 +274,7 @@ struct i2c_driver {
/* Standard driver model interfaces */
int (*probe)(struct i2c_client *client, const struct i2c_device_id *id);
- int (*remove)(struct i2c_client *client);
+ void (*remove)(struct i2c_client *client);
/* New driver model interface to aid the seamless removal of the
* current probe()'s, more commonly unused than used second parameter.
diff --git a/include/linux/i3c/device.h b/include/linux/i3c/device.h
index 8242e13e7b0b..1c997abe868c 100644
--- a/include/linux/i3c/device.h
+++ b/include/linux/i3c/device.h
@@ -287,12 +287,15 @@ static inline void i3c_i2c_driver_unregister(struct i3c_driver *i3cdrv,
#define module_i3c_i2c_driver(__i3cdrv, __i2cdrv) \
module_driver(__i3cdrv, \
i3c_i2c_driver_register, \
- i3c_i2c_driver_unregister)
+ i3c_i2c_driver_unregister, \
+ __i2cdrv)
int i3c_device_do_priv_xfers(struct i3c_device *dev,
struct i3c_priv_xfer *xfers,
int nxfers);
+int i3c_device_do_setdasa(struct i3c_device *dev);
+
void i3c_device_get_info(struct i3c_device *dev, struct i3c_device_info *info);
struct i3c_ibi_payload {
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 55e6f4ad0ca6..80d6308dea06 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -310,9 +310,11 @@ static inline u16 ieee80211_sn_sub(u16 sn1, u16 sn2)
struct ieee80211_hdr {
__le16 frame_control;
__le16 duration_id;
- u8 addr1[ETH_ALEN];
- u8 addr2[ETH_ALEN];
- u8 addr3[ETH_ALEN];
+ struct_group(addrs,
+ u8 addr1[ETH_ALEN];
+ u8 addr2[ETH_ALEN];
+ u8 addr3[ETH_ALEN];
+ );
__le16 seq_ctrl;
u8 addr4[ETH_ALEN];
} __packed __aligned(2);
@@ -336,6 +338,17 @@ struct ieee80211_qos_hdr {
__le16 qos_ctrl;
} __packed __aligned(2);
+struct ieee80211_qos_hdr_4addr {
+ __le16 frame_control;
+ __le16 duration_id;
+ u8 addr1[ETH_ALEN];
+ u8 addr2[ETH_ALEN];
+ u8 addr3[ETH_ALEN];
+ __le16 seq_ctrl;
+ u8 addr4[ETH_ALEN];
+ __le16 qos_ctrl;
+} __packed __aligned(2);
+
struct ieee80211_trigger {
__le16 frame_control;
__le16 duration;
@@ -2886,7 +2899,8 @@ ieee80211_he_spr_size(const u8 *he_spr_ie)
/* Calculate 802.11be EHT capabilities IE Tx/Rx EHT MCS NSS Support Field size */
static inline u8
ieee80211_eht_mcs_nss_size(const struct ieee80211_he_cap_elem *he_cap,
- const struct ieee80211_eht_cap_elem_fixed *eht_cap)
+ const struct ieee80211_eht_cap_elem_fixed *eht_cap,
+ bool from_ap)
{
u8 count = 0;
@@ -2907,7 +2921,10 @@ ieee80211_eht_mcs_nss_size(const struct ieee80211_he_cap_elem *he_cap,
if (eht_cap->phy_cap_info[0] & IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ)
count += 3;
- return count ? count : 4;
+ if (count)
+ return count;
+
+ return from_ap ? 3 : 4;
}
/* 802.11be EHT PPE Thresholds */
@@ -2943,7 +2960,8 @@ ieee80211_eht_ppe_size(u16 ppe_thres_hdr, const u8 *phy_cap_info)
}
static inline bool
-ieee80211_eht_capa_size_ok(const u8 *he_capa, const u8 *data, u8 len)
+ieee80211_eht_capa_size_ok(const u8 *he_capa, const u8 *data, u8 len,
+ bool from_ap)
{
const struct ieee80211_eht_cap_elem_fixed *elem = (const void *)data;
u8 needed = sizeof(struct ieee80211_eht_cap_elem_fixed);
@@ -2952,7 +2970,8 @@ ieee80211_eht_capa_size_ok(const u8 *he_capa, const u8 *data, u8 len)
return false;
needed += ieee80211_eht_mcs_nss_size((const void *)he_capa,
- (const void *)data);
+ (const void *)data,
+ from_ap);
if (len < needed)
return false;
@@ -4052,16 +4071,21 @@ struct ieee80211_he_6ghz_capa {
* @hdr: the frame
*
* The qos ctrl bytes come after the frame_control, duration, seq_num
- * and 3 or 4 addresses of length ETH_ALEN.
- * 3 addr: 2 + 2 + 2 + 3*6 = 24
- * 4 addr: 2 + 2 + 2 + 4*6 = 30
+ * and 3 or 4 addresses of length ETH_ALEN. Checks frame_control to choose
+ * between struct ieee80211_qos_hdr_4addr and struct ieee80211_qos_hdr.
*/
static inline u8 *ieee80211_get_qos_ctl(struct ieee80211_hdr *hdr)
{
- if (ieee80211_has_a4(hdr->frame_control))
- return (u8 *)hdr + 30;
+ union {
+ struct ieee80211_qos_hdr addr3;
+ struct ieee80211_qos_hdr_4addr addr4;
+ } *qos;
+
+ qos = (void *)hdr;
+ if (ieee80211_has_a4(qos->addr3.frame_control))
+ return (u8 *)&qos->addr4.qos_ctrl;
else
- return (u8 *)hdr + 24;
+ return (u8 *)&qos->addr3.qos_ctrl;
}
/**
@@ -4565,18 +4589,17 @@ static inline u8 ieee80211_mle_common_size(const u8 *data)
switch (u16_get_bits(control, IEEE80211_ML_CONTROL_TYPE)) {
case IEEE80211_ML_CONTROL_TYPE_BASIC:
- common += sizeof(struct ieee80211_mle_basic_common_info);
- break;
case IEEE80211_ML_CONTROL_TYPE_PREQ:
- common += sizeof(struct ieee80211_mle_preq_common_info);
+ case IEEE80211_ML_CONTROL_TYPE_TDLS:
+ /*
+ * The length is the first octet pointed by mle->variable so no
+ * need to add anything
+ */
break;
case IEEE80211_ML_CONTROL_TYPE_RECONF:
if (control & IEEE80211_MLC_RECONF_PRES_MLD_MAC_ADDR)
common += ETH_ALEN;
return common;
- case IEEE80211_ML_CONTROL_TYPE_TDLS:
- common += sizeof(struct ieee80211_mle_tdls_common_info);
- break;
case IEEE80211_ML_CONTROL_TYPE_PRIO_ACCESS:
if (control & IEEE80211_MLC_PRIO_ACCESS_PRES_AP_MLD_MAC_ADDR)
common += ETH_ALEN;
@@ -4586,7 +4609,7 @@ static inline u8 ieee80211_mle_common_size(const u8 *data)
return 0;
}
- return common + mle->variable[0];
+ return sizeof(*mle) + common + mle->variable[0];
}
/**
@@ -4594,7 +4617,7 @@ static inline u8 ieee80211_mle_common_size(const u8 *data)
* @data: pointer to the element data
* @len: length of the containing element
*/
-static inline bool ieee80211_mle_size_ok(const u8 *data, u8 len)
+static inline bool ieee80211_mle_size_ok(const u8 *data, size_t len)
{
const struct ieee80211_multi_link_elem *mle = (const void *)data;
u8 fixed = sizeof(*mle);
@@ -4659,6 +4682,7 @@ static inline bool ieee80211_mle_size_ok(const u8 *data, u8 len)
enum ieee80211_mle_subelems {
IEEE80211_MLE_SUBELEM_PER_STA_PROFILE = 0,
+ IEEE80211_MLE_SUBELEM_FRAGMENT = 254,
};
#define IEEE80211_MLE_STA_CONTROL_LINK_ID 0x000f
@@ -4677,6 +4701,46 @@ struct ieee80211_mle_per_sta_profile {
u8 variable[];
} __packed;
+/**
+ * ieee80211_mle_sta_prof_size_ok - validate multi-link element sta profile size
+ * @data: pointer to the sub element data
+ * @len: length of the containing sub element
+ */
+static inline bool ieee80211_mle_sta_prof_size_ok(const u8 *data, size_t len)
+{
+ const struct ieee80211_mle_per_sta_profile *prof = (const void *)data;
+ u16 control;
+ u8 fixed = sizeof(*prof);
+ u8 info_len = 1;
+
+ if (len < fixed)
+ return false;
+
+ control = le16_to_cpu(prof->control);
+
+ if (control & IEEE80211_MLE_STA_CONTROL_STA_MAC_ADDR_PRESENT)
+ info_len += 6;
+ if (control & IEEE80211_MLE_STA_CONTROL_BEACON_INT_PRESENT)
+ info_len += 2;
+ if (control & IEEE80211_MLE_STA_CONTROL_TSF_OFFS_PRESENT)
+ info_len += 8;
+ if (control & IEEE80211_MLE_STA_CONTROL_DTIM_INFO_PRESENT)
+ info_len += 2;
+ if (control & IEEE80211_MLE_STA_CONTROL_BSS_PARAM_CHANGE_CNT_PRESENT)
+ info_len += 1;
+
+ if (control & IEEE80211_MLE_STA_CONTROL_COMPLETE_PROFILE &&
+ control & IEEE80211_MLE_STA_CONTROL_NSTR_BITMAP_SIZE) {
+ if (control & IEEE80211_MLE_STA_CONTROL_NSTR_BITMAP_SIZE)
+ info_len += 2;
+ else
+ info_len += 1;
+ }
+
+ return prof->sta_info_len >= info_len &&
+ fixed + prof->sta_info_len <= len;
+}
+
#define for_each_mle_subelement(_elem, _data, _len) \
if (ieee80211_mle_size_ok(_data, _len)) \
for_each_element(_elem, \
diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h
index f1f9412b6ac6..0303eb84d596 100644
--- a/include/linux/ieee802154.h
+++ b/include/linux/ieee802154.h
@@ -276,6 +276,30 @@ enum {
IEEE802154_SYSTEM_ERROR = 0xff,
};
+/**
+ * enum ieee802154_filtering_level - Filtering levels applicable to a PHY
+ *
+ * @IEEE802154_FILTERING_NONE: No filtering at all, what is received is
+ * forwarded to the softMAC
+ * @IEEE802154_FILTERING_1_FCS: First filtering level, frames with an invalid
+ * FCS should be dropped
+ * @IEEE802154_FILTERING_2_PROMISCUOUS: Second filtering level, promiscuous
+ * mode as described in the spec, identical in terms of filtering to the
+ * level one on PHY side, but at the MAC level the frame should be
+ * forwarded to the upper layer directly
+ * @IEEE802154_FILTERING_3_SCAN: Third filtering level, scan related, where
+ * only beacons must be processed, all remaining traffic gets dropped
+ * @IEEE802154_FILTERING_4_FRAME_FIELDS: Fourth filtering level actually
+ * enforcing the validity of the content of the frame with various checks
+ */
+enum ieee802154_filtering_level {
+ IEEE802154_FILTERING_NONE,
+ IEEE802154_FILTERING_1_FCS,
+ IEEE802154_FILTERING_2_PROMISCUOUS,
+ IEEE802154_FILTERING_3_SCAN,
+ IEEE802154_FILTERING_4_FRAME_FIELDS,
+};
+
/* frame control handling */
#define IEEE802154_FCTL_FTYPE 0x0003
#define IEEE802154_FCTL_ACKREQ 0x0020
diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index d62ef428e3aa..1668ac4d7adc 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -59,6 +59,7 @@ struct br_ip_list {
#define BR_MRP_LOST_IN_CONT BIT(19)
#define BR_TX_FWD_OFFLOAD BIT(20)
#define BR_PORT_LOCKED BIT(21)
+#define BR_PORT_MAB BIT(22)
#define BR_DEFAULT_AGEING_TIME (300 * HZ)
diff --git a/include/linux/if_pppol2tp.h b/include/linux/if_pppol2tp.h
index 96d40942e5a3..c87efd333faa 100644
--- a/include/linux/if_pppol2tp.h
+++ b/include/linux/if_pppol2tp.h
@@ -4,8 +4,6 @@
*
* This file supplies definitions required by the PPP over L2TP driver
* (l2tp_ppp.c). All version information wrt this file is located in l2tp_ppp.c
- *
- * License:
*/
#ifndef __LINUX_IF_PPPOL2TP_H
#define __LINUX_IF_PPPOL2TP_H
diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h
index 69e813bcb947..ff3beda1312c 100644
--- a/include/linux/if_pppox.h
+++ b/include/linux/if_pppox.h
@@ -5,8 +5,6 @@
*
* This file supplies definitions required by the PPP over Ethernet driver
* (pppox.c). All version information wrt this file is located in pppox.c
- *
- * License:
*/
#ifndef __LINUX_IF_PPPOX_H
#define __LINUX_IF_PPPOX_H
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index e00c4ee81ff7..6864b89ef868 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -76,7 +76,7 @@ static inline bool is_vlan_dev(const struct net_device *dev)
return dev->priv_flags & IFF_802_1Q_VLAN;
}
-#define skb_vlan_tag_present(__skb) ((__skb)->vlan_present)
+#define skb_vlan_tag_present(__skb) (!!(__skb)->vlan_all)
#define skb_vlan_tag_get(__skb) ((__skb)->vlan_tci)
#define skb_vlan_tag_get_id(__skb) ((__skb)->vlan_tci & VLAN_VID_MASK)
#define skb_vlan_tag_get_cfi(__skb) (!!((__skb)->vlan_tci & VLAN_CFI_MASK))
@@ -471,7 +471,7 @@ static inline struct sk_buff *vlan_insert_tag_set_proto(struct sk_buff *skb,
*/
static inline void __vlan_hwaccel_clear_tag(struct sk_buff *skb)
{
- skb->vlan_present = 0;
+ skb->vlan_all = 0;
}
/**
@@ -483,9 +483,7 @@ static inline void __vlan_hwaccel_clear_tag(struct sk_buff *skb)
*/
static inline void __vlan_hwaccel_copy_tag(struct sk_buff *dst, const struct sk_buff *src)
{
- dst->vlan_present = src->vlan_present;
- dst->vlan_proto = src->vlan_proto;
- dst->vlan_tci = src->vlan_tci;
+ dst->vlan_all = src->vlan_all;
}
/*
@@ -519,7 +517,6 @@ static inline void __vlan_hwaccel_put_tag(struct sk_buff *skb,
{
skb->vlan_proto = vlan_proto;
skb->vlan_tci = vlan_tci;
- skb->vlan_present = 1;
}
/**
diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index 93c262ecbdc9..78890143f079 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -118,9 +118,9 @@ extern int ip_mc_source(int add, int omode, struct sock *sk,
struct ip_mreq_source *mreqs, int ifindex);
extern int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf,int ifindex);
extern int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
- struct ip_msfilter __user *optval, int __user *optlen);
+ sockptr_t optval, sockptr_t optlen);
extern int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
- struct sockaddr_storage __user *p);
+ sockptr_t optval, size_t offset);
extern int ip_mc_sf_allow(struct sock *sk, __be32 local, __be32 rmt,
int dif, int sdif);
extern void ip_mc_init_dev(struct in_device *);
diff --git a/include/linux/iio/consumer.h b/include/linux/iio/consumer.h
index 5fa5957586cf..6802596b017c 100644
--- a/include/linux/iio/consumer.h
+++ b/include/linux/iio/consumer.h
@@ -13,7 +13,7 @@
struct iio_dev;
struct iio_chan_spec;
struct device;
-struct device_node;
+struct fwnode_handle;
/**
* struct iio_channel - everything needed for a consumer to use a channel
@@ -99,26 +99,20 @@ void iio_channel_release_all(struct iio_channel *chan);
struct iio_channel *devm_iio_channel_get_all(struct device *dev);
/**
- * of_iio_channel_get_by_name() - get description of all that is needed to access channel.
- * @np: Pointer to consumer device tree node
+ * fwnode_iio_channel_get_by_name() - get description of all that is needed to access channel.
+ * @fwnode: Pointer to consumer Firmware node
* @consumer_channel: Unique name to identify the channel on the consumer
* side. This typically describes the channels use within
* the consumer. E.g. 'battery_voltage'
*/
-#ifdef CONFIG_OF
-struct iio_channel *of_iio_channel_get_by_name(struct device_node *np, const char *name);
-#else
-static inline struct iio_channel *
-of_iio_channel_get_by_name(struct device_node *np, const char *name)
-{
- return NULL;
-}
-#endif
+struct iio_channel *fwnode_iio_channel_get_by_name(struct fwnode_handle *fwnode,
+ const char *name);
/**
- * devm_of_iio_channel_get_by_name() - Resource managed version of of_iio_channel_get_by_name().
+ * devm_fwnode_iio_channel_get_by_name() - Resource managed version of
+ * fwnode_iio_channel_get_by_name().
* @dev: Pointer to consumer device.
- * @np: Pointer to consumer device tree node
+ * @fwnode: Pointer to consumer Firmware node
* @consumer_channel: Unique name to identify the channel on the consumer
* side. This typically describes the channels use within
* the consumer. E.g. 'battery_voltage'
@@ -129,9 +123,9 @@ of_iio_channel_get_by_name(struct device_node *np, const char *name)
* The allocated iio channel is automatically released when the device is
* unbound.
*/
-struct iio_channel *devm_of_iio_channel_get_by_name(struct device *dev,
- struct device_node *np,
- const char *consumer_channel);
+struct iio_channel *devm_fwnode_iio_channel_get_by_name(struct device *dev,
+ struct fwnode_handle *fwnode,
+ const char *consumer_channel);
struct iio_cb_buffer;
/**
diff --git a/include/linux/iio/iio-opaque.h b/include/linux/iio/iio-opaque.h
index 6b3586b3f952..d1f8b30a7c8b 100644
--- a/include/linux/iio/iio-opaque.h
+++ b/include/linux/iio/iio-opaque.h
@@ -11,6 +11,7 @@
* checked by device drivers but should be considered
* read-only as this is a core internal bit
* @driver_module: used to make it harder to undercut users
+ * @mlock_key: lockdep class for iio_dev lock
* @info_exist_lock: lock to prevent use during removal
* @trig_readonly: mark the current trigger immutable
* @event_interface: event chrdevs associated with interrupt lines
@@ -42,6 +43,7 @@ struct iio_dev_opaque {
int currentmode;
int id;
struct module *driver_module;
+ struct lock_class_key mlock_key;
struct mutex info_exist_lock;
bool trig_readonly;
struct iio_event_interface *event_interface;
diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index 5dfbfc991c69..f0ec8a5e5a7a 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -17,7 +17,7 @@
* Currently assumes nano seconds.
*/
-struct of_phandle_args;
+struct fwnode_reference_args;
enum iio_shared_by {
IIO_SEPARATE,
@@ -429,6 +429,8 @@ struct iio_trigger; /* forward declaration */
* provide a custom of_xlate function that reads the
* *args* and returns the appropriate index in registered
* IIO channels array.
+ * @fwnode_xlate: fwnode based function pointer to obtain channel specifier index.
+ * Functionally the same as @of_xlate.
* @hwfifo_set_watermark: function pointer to set the current hardware
* fifo watermark level; see hwfifo_* entries in
* Documentation/ABI/testing/sysfs-bus-iio for details on
@@ -508,8 +510,8 @@ struct iio_info {
int (*debugfs_reg_access)(struct iio_dev *indio_dev,
unsigned reg, unsigned writeval,
unsigned *readval);
- int (*of_xlate)(struct iio_dev *indio_dev,
- const struct of_phandle_args *iiospec);
+ int (*fwnode_xlate)(struct iio_dev *indio_dev,
+ const struct fwnode_reference_args *iiospec);
int (*hwfifo_set_watermark)(struct iio_dev *indio_dev, unsigned val);
int (*hwfifo_flush_to_buffer)(struct iio_dev *indio_dev,
unsigned count);
diff --git a/include/linux/iio/types.h b/include/linux/iio/types.h
index a7aa91f3a8dc..82faa98c719a 100644
--- a/include/linux/iio/types.h
+++ b/include/linux/iio/types.h
@@ -17,6 +17,8 @@ enum iio_event_info {
IIO_EV_INFO_HIGH_PASS_FILTER_3DB,
IIO_EV_INFO_LOW_PASS_FILTER_3DB,
IIO_EV_INFO_TIMEOUT,
+ IIO_EV_INFO_RESET_TIMEOUT,
+ IIO_EV_INFO_TAP2_MIN_DELAY,
};
#define IIO_VAL_INT 1
@@ -63,6 +65,7 @@ enum iio_chan_info_enum {
IIO_CHAN_INFO_OVERSAMPLING_RATIO,
IIO_CHAN_INFO_THERMOCOUPLE_TYPE,
IIO_CHAN_INFO_CALIBAMBIENT,
+ IIO_CHAN_INFO_ZEROPOINT,
};
#endif /* _IIO_TYPES_H_ */
diff --git a/include/linux/ima.h b/include/linux/ima.h
index 81708ca0ebc7..5a0b2a285a18 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -187,6 +187,15 @@ extern void ima_inode_post_setattr(struct user_namespace *mnt_userns,
struct dentry *dentry);
extern int ima_inode_setxattr(struct dentry *dentry, const char *xattr_name,
const void *xattr_value, size_t xattr_value_len);
+extern int ima_inode_set_acl(struct user_namespace *mnt_userns,
+ struct dentry *dentry, const char *acl_name,
+ struct posix_acl *kacl);
+static inline int ima_inode_remove_acl(struct user_namespace *mnt_userns,
+ struct dentry *dentry,
+ const char *acl_name)
+{
+ return ima_inode_set_acl(mnt_userns, dentry, acl_name, NULL);
+}
extern int ima_inode_removexattr(struct dentry *dentry, const char *xattr_name);
#else
static inline bool is_ima_appraise_enabled(void)
@@ -208,11 +217,26 @@ static inline int ima_inode_setxattr(struct dentry *dentry,
return 0;
}
+static inline int ima_inode_set_acl(struct user_namespace *mnt_userns,
+ struct dentry *dentry, const char *acl_name,
+ struct posix_acl *kacl)
+{
+
+ return 0;
+}
+
static inline int ima_inode_removexattr(struct dentry *dentry,
const char *xattr_name)
{
return 0;
}
+
+static inline int ima_inode_remove_acl(struct user_namespace *mnt_userns,
+ struct dentry *dentry,
+ const char *acl_name)
+{
+ return 0;
+}
#endif /* CONFIG_IMA_APPRAISE */
#if defined(CONFIG_IMA_APPRAISE) && defined(CONFIG_INTEGRITY_TRUSTED_KEYRING)
diff --git a/include/linux/init.h b/include/linux/init.h
index baf0b29a7010..c5fe6d26f5b1 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -2,7 +2,9 @@
#ifndef _LINUX_INIT_H
#define _LINUX_INIT_H
+#include <linux/build_bug.h>
#include <linux/compiler.h>
+#include <linux/stringify.h>
#include <linux/types.h>
/* Built-in __init functions needn't be compiled with retpoline */
@@ -47,7 +49,7 @@
/* These are for everybody (although not all archs will actually
discard it in modules) */
-#define __init __section(".init.text") __cold __latent_entropy __noinitretpoline __nocfi
+#define __init __section(".init.text") __cold __latent_entropy __noinitretpoline
#define __initdata __section(".init.data")
#define __initconst __section(".init.rodata")
#define __exitdata __section(".exit.data")
@@ -134,7 +136,7 @@ static inline initcall_t initcall_from_entry(initcall_entry_t *entry)
extern initcall_entry_t __con_initcall_start[], __con_initcall_end[];
-/* Used for contructor calls. */
+/* Used for constructor calls. */
typedef void (*ctor_fn_t)(void);
struct file_system_type;
@@ -143,6 +145,7 @@ struct file_system_type;
extern int do_one_initcall(initcall_t fn);
extern char __initdata boot_command_line[];
extern char *saved_command_line;
+extern unsigned int saved_command_line_len;
extern unsigned int reset_devices;
/* used by init/main.c */
@@ -220,8 +223,8 @@ extern bool initcall_debug;
__initcall_name(initstub, __iid, id)
#define __define_initcall_stub(__stub, fn) \
- int __init __cficanonical __stub(void); \
- int __init __cficanonical __stub(void) \
+ int __init __stub(void); \
+ int __init __stub(void) \
{ \
return fn(); \
} \
diff --git a/include/linux/input/auo-pixcir-ts.h b/include/linux/input/auo-pixcir-ts.h
deleted file mode 100644
index ed0776997a7a..000000000000
--- a/include/linux/input/auo-pixcir-ts.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Driver for AUO in-cell touchscreens
- *
- * Copyright (c) 2011 Heiko Stuebner <heiko@sntech.de>
- *
- * based on auo_touch.h from Dell Streak kernel
- *
- * Copyright (c) 2008 QUALCOMM Incorporated.
- * Copyright (c) 2008 QUALCOMM USA, INC.
- */
-
-#ifndef __AUO_PIXCIR_TS_H__
-#define __AUO_PIXCIR_TS_H__
-
-/*
- * Interrupt modes:
- * periodical: interrupt is asserted periodicaly
- * compare coordinates: interrupt is asserted when coordinates change
- * indicate touch: interrupt is asserted during touch
- */
-#define AUO_PIXCIR_INT_PERIODICAL 0x00
-#define AUO_PIXCIR_INT_COMP_COORD 0x01
-#define AUO_PIXCIR_INT_TOUCH_IND 0x02
-
-/*
- * @gpio_int interrupt gpio
- * @int_setting one of AUO_PIXCIR_INT_*
- * @init_hw hardwarespecific init
- * @exit_hw hardwarespecific shutdown
- * @x_max x-resolution
- * @y_max y-resolution
- */
-struct auo_pixcir_ts_platdata {
- int gpio_int;
- int gpio_rst;
-
- int int_setting;
-
- unsigned int x_max;
- unsigned int y_max;
-};
-
-#endif
diff --git a/include/linux/instrumented.h b/include/linux/instrumented.h
index 42faebbaa202..501fa8486749 100644
--- a/include/linux/instrumented.h
+++ b/include/linux/instrumented.h
@@ -2,7 +2,7 @@
/*
* This header provides generic wrappers for memory access instrumentation that
- * the compiler cannot emit for: KASAN, KCSAN.
+ * the compiler cannot emit for: KASAN, KCSAN, KMSAN.
*/
#ifndef _LINUX_INSTRUMENTED_H
#define _LINUX_INSTRUMENTED_H
@@ -10,6 +10,7 @@
#include <linux/compiler.h>
#include <linux/kasan-checks.h>
#include <linux/kcsan-checks.h>
+#include <linux/kmsan-checks.h>
#include <linux/types.h>
/**
@@ -117,10 +118,11 @@ instrument_copy_to_user(void __user *to, const void *from, unsigned long n)
{
kasan_check_read(from, n);
kcsan_check_read(from, n);
+ kmsan_copy_to_user(to, from, n, 0);
}
/**
- * instrument_copy_from_user - instrument writes of copy_from_user
+ * instrument_copy_from_user_before - add instrumentation before copy_from_user
*
* Instrument writes to kernel memory, that are due to copy_from_user (and
* variants). The instrumentation should be inserted before the accesses.
@@ -130,10 +132,61 @@ instrument_copy_to_user(void __user *to, const void *from, unsigned long n)
* @n number of bytes to copy
*/
static __always_inline void
-instrument_copy_from_user(const void *to, const void __user *from, unsigned long n)
+instrument_copy_from_user_before(const void *to, const void __user *from, unsigned long n)
{
kasan_check_write(to, n);
kcsan_check_write(to, n);
}
+/**
+ * instrument_copy_from_user_after - add instrumentation after copy_from_user
+ *
+ * Instrument writes to kernel memory, that are due to copy_from_user (and
+ * variants). The instrumentation should be inserted after the accesses.
+ *
+ * @to destination address
+ * @from source address
+ * @n number of bytes to copy
+ * @left number of bytes not copied (as returned by copy_from_user)
+ */
+static __always_inline void
+instrument_copy_from_user_after(const void *to, const void __user *from,
+ unsigned long n, unsigned long left)
+{
+ kmsan_unpoison_memory(to, n - left);
+}
+
+/**
+ * instrument_get_user() - add instrumentation to get_user()-like macros
+ *
+ * get_user() and friends are fragile, so it may depend on the implementation
+ * whether the instrumentation happens before or after the data is copied from
+ * the userspace.
+ *
+ * @to destination variable, may not be address-taken
+ */
+#define instrument_get_user(to) \
+({ \
+ u64 __tmp = (u64)(to); \
+ kmsan_unpoison_memory(&__tmp, sizeof(__tmp)); \
+ to = __tmp; \
+})
+
+
+/**
+ * instrument_put_user() - add instrumentation to put_user()-like macros
+ *
+ * put_user() and friends are fragile, so it may depend on the implementation
+ * whether the instrumentation happens before or after the data is copied from
+ * the userspace.
+ *
+ * @from source address
+ * @ptr userspace pointer to copy to
+ * @size number of bytes to copy
+ */
+#define instrument_put_user(from, ptr, size) \
+({ \
+ kmsan_copy_to_user(ptr, &from, sizeof(from), 0); \
+})
+
#endif /* _LINUX_INSTRUMENTED_H */
diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h
index 207ef06ba3e1..f9a0d44f6fdb 100644
--- a/include/linux/intel-svm.h
+++ b/include/linux/intel-svm.h
@@ -13,17 +13,4 @@
#define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x20)
#define PRQ_DEPTH ((0x1000 << PRQ_ORDER) >> 5)
-/*
- * The SVM_FLAG_SUPERVISOR_MODE flag requests a PASID which can be used only
- * for access to kernel addresses. No IOTLB flushes are automatically done
- * for kernel mappings; it is valid only for access to the kernel's static
- * 1:1 mapping of physical memory — not to vmalloc or even module mappings.
- * A future API addition may permit the use of such ranges, by means of an
- * explicit IOTLB flush call (akin to the DMA API's unmap method).
- *
- * It is unlikely that we will ever hook into flush_tlb_kernel_range() to
- * do such IOTLB flushes automatically.
- */
-#define SVM_FLAG_SUPERVISOR_MODE BIT(0)
-
#endif /* __INTEL_SVM_H__ */
diff --git a/include/linux/interconnect-provider.h b/include/linux/interconnect-provider.h
index 6bd01f7159c6..cd5c5a27557f 100644
--- a/include/linux/interconnect-provider.h
+++ b/include/linux/interconnect-provider.h
@@ -123,7 +123,7 @@ void icc_node_add(struct icc_node *node, struct icc_provider *provider);
void icc_node_del(struct icc_node *node);
int icc_nodes_remove(struct icc_provider *provider);
int icc_provider_add(struct icc_provider *provider);
-int icc_provider_del(struct icc_provider *provider);
+void icc_provider_del(struct icc_provider *provider);
struct icc_node_data *of_icc_get_from_provider(struct of_phandle_args *spec);
void icc_sync_state(struct device *dev);
@@ -172,9 +172,8 @@ static inline int icc_provider_add(struct icc_provider *provider)
return -ENOTSUPP;
}
-static inline int icc_provider_del(struct icc_provider *provider)
+static inline void icc_provider_del(struct icc_provider *provider)
{
- return -ENOTSUPP;
}
static inline struct icc_node_data *of_icc_get_from_provider(struct of_phandle_args *spec)
diff --git a/include/linux/interval_tree.h b/include/linux/interval_tree.h
index 288c26f50732..2b8026a39906 100644
--- a/include/linux/interval_tree.h
+++ b/include/linux/interval_tree.h
@@ -27,4 +27,62 @@ extern struct interval_tree_node *
interval_tree_iter_next(struct interval_tree_node *node,
unsigned long start, unsigned long last);
+/**
+ * struct interval_tree_span_iter - Find used and unused spans.
+ * @start_hole: Start of an interval for a hole when is_hole == 1
+ * @last_hole: Inclusive end of an interval for a hole when is_hole == 1
+ * @start_used: Start of a used interval when is_hole == 0
+ * @last_used: Inclusive end of a used interval when is_hole == 0
+ * @is_hole: 0 == used, 1 == is_hole, -1 == done iteration
+ *
+ * This iterator travels over spans in an interval tree. It does not return
+ * nodes but classifies each span as either a hole, where no nodes intersect, or
+ * a used, which is fully covered by nodes. Each iteration step toggles between
+ * hole and used until the entire range is covered. The returned spans always
+ * fully cover the requested range.
+ *
+ * The iterator is greedy, it always returns the largest hole or used possible,
+ * consolidating all consecutive nodes.
+ *
+ * Use interval_tree_span_iter_done() to detect end of iteration.
+ */
+struct interval_tree_span_iter {
+ /* private: not for use by the caller */
+ struct interval_tree_node *nodes[2];
+ unsigned long first_index;
+ unsigned long last_index;
+
+ /* public: */
+ union {
+ unsigned long start_hole;
+ unsigned long start_used;
+ };
+ union {
+ unsigned long last_hole;
+ unsigned long last_used;
+ };
+ int is_hole;
+};
+
+void interval_tree_span_iter_first(struct interval_tree_span_iter *state,
+ struct rb_root_cached *itree,
+ unsigned long first_index,
+ unsigned long last_index);
+void interval_tree_span_iter_advance(struct interval_tree_span_iter *iter,
+ struct rb_root_cached *itree,
+ unsigned long new_index);
+void interval_tree_span_iter_next(struct interval_tree_span_iter *state);
+
+static inline bool
+interval_tree_span_iter_done(struct interval_tree_span_iter *state)
+{
+ return state->is_hole == -1;
+}
+
+#define interval_tree_for_each_span(span, itree, first_index, last_index) \
+ for (interval_tree_span_iter_first(span, itree, \
+ first_index, last_index); \
+ !interval_tree_span_iter_done(span); \
+ interval_tree_span_iter_next(span))
+
#endif /* _LINUX_INTERVAL_TREE_H */
diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h
index 66a774d2710e..09d4f17c8d3b 100644
--- a/include/linux/io-mapping.h
+++ b/include/linux/io-mapping.h
@@ -213,7 +213,7 @@ io_mapping_free(struct io_mapping *iomap)
kfree(iomap);
}
-#endif /* _LINUX_IO_MAPPING_H */
-
int io_mapping_map_user(struct io_mapping *iomap, struct vm_area_struct *vma,
unsigned long addr, unsigned long pfn, unsigned long size);
+
+#endif /* _LINUX_IO_MAPPING_H */
diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index ca98aeadcc80..1f068dfdb140 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -16,7 +16,9 @@ enum io_pgtable_fmt {
ARM_V7S,
ARM_MALI_LPAE,
AMD_IOMMU_V1,
+ AMD_IOMMU_V2,
APPLE_DART,
+ APPLE_DART2,
IO_PGTABLE_NUM_FMTS,
};
@@ -260,6 +262,7 @@ extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns;
extern struct io_pgtable_init_fns io_pgtable_arm_v7s_init_fns;
extern struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns;
extern struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns;
+extern struct io_pgtable_init_fns io_pgtable_amd_iommu_v2_init_fns;
extern struct io_pgtable_init_fns io_pgtable_apple_dart_init_fns;
#endif /* __IO_PGTABLE_H */
diff --git a/include/linux/io.h b/include/linux/io.h
index 5fc800390fe4..308f4f0cfb93 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -59,8 +59,6 @@ void __iomem *devm_ioremap_uc(struct device *dev, resource_size_t offset,
resource_size_t size);
void __iomem *devm_ioremap_wc(struct device *dev, resource_size_t offset,
resource_size_t size);
-void __iomem *devm_ioremap_np(struct device *dev, resource_size_t offset,
- resource_size_t size);
void devm_iounmap(struct device *dev, void __iomem *addr);
int check_signature(const volatile void __iomem *io_addr,
const unsigned char *signature, int length);
diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
index 4a2f6cc5a492..934e5dd4ccc0 100644
--- a/include/linux/io_uring.h
+++ b/include/linux/io_uring.h
@@ -4,30 +4,41 @@
#include <linux/sched.h>
#include <linux/xarray.h>
+#include <uapi/linux/io_uring.h>
enum io_uring_cmd_flags {
IO_URING_F_COMPLETE_DEFER = 1,
IO_URING_F_UNLOCKED = 2,
+ /* the request is executed from poll, it should not be freed */
+ IO_URING_F_MULTISHOT = 4,
+ /* executed by io-wq */
+ IO_URING_F_IOWQ = 8,
/* int's last bit, sign checks are usually faster than a bit test */
IO_URING_F_NONBLOCK = INT_MIN,
/* ctx state flags, for URING_CMD */
- IO_URING_F_SQE128 = 4,
- IO_URING_F_CQE32 = 8,
- IO_URING_F_IOPOLL = 16,
+ IO_URING_F_SQE128 = (1 << 8),
+ IO_URING_F_CQE32 = (1 << 9),
+ IO_URING_F_IOPOLL = (1 << 10),
};
struct io_uring_cmd {
struct file *file;
const void *cmd;
- /* callback to defer completions to task context */
- void (*task_work_cb)(struct io_uring_cmd *cmd);
+ union {
+ /* callback to defer completions to task context */
+ void (*task_work_cb)(struct io_uring_cmd *cmd);
+ /* used for polled completion */
+ void *cookie;
+ };
u32 cmd_op;
- u32 pad;
+ u32 flags;
u8 pdu[32]; /* available inline for free use */
};
#if defined(CONFIG_IO_URING)
+int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
+ struct iov_iter *iter, void *ioucmd);
void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2);
void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
void (*task_work_cb)(struct io_uring_cmd *));
@@ -55,6 +66,11 @@ static inline void io_uring_free(struct task_struct *tsk)
__io_uring_free(tsk);
}
#else
+static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
+ struct iov_iter *iter, void *ioucmd)
+{
+ return -EOPNOTSUPP;
+}
static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret,
ssize_t ret2)
{
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 677a25d44d7f..dcd8a563ab52 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -34,9 +34,6 @@ struct io_file_table {
unsigned int alloc_hint;
};
-struct io_notif;
-struct io_notif_slot;
-
struct io_hash_bucket {
spinlock_t lock;
struct hlist_head list;
@@ -177,13 +174,17 @@ struct io_submit_state {
bool plug_started;
bool need_plug;
unsigned short submit_nr;
+ unsigned int cqes_count;
struct blk_plug plug;
+ struct io_uring_cqe cqes[16];
};
struct io_ev_fd {
struct eventfd_ctx *cq_ev_fd;
unsigned int eventfd_async: 1;
struct rcu_head rcu;
+ atomic_t refs;
+ atomic_t ops;
};
struct io_alloc_cache {
@@ -207,6 +208,8 @@ struct io_ring_ctx {
unsigned int drain_disabled: 1;
unsigned int has_evfd: 1;
unsigned int syscall_iopoll: 1;
+ /* all CQEs should be posted only by the submitter task */
+ unsigned int task_complete: 1;
} ____cacheline_aligned_in_smp;
/* submission data */
@@ -240,8 +243,6 @@ struct io_ring_ctx {
unsigned nr_user_files;
unsigned nr_user_bufs;
struct io_mapped_ubuf **user_bufs;
- struct io_notif_slot *notif_slots;
- unsigned nr_notif_slots;
struct io_submit_state submit_state;
@@ -301,6 +302,8 @@ struct io_ring_ctx {
struct io_hash_table cancel_table;
bool poll_multi_queue;
+ struct llist_head work_llist;
+
struct list_head io_buffers_comp;
} ____cacheline_aligned_in_smp;
@@ -325,6 +328,7 @@ struct io_ring_ctx {
struct io_rsrc_data *buf_data;
struct delayed_work rsrc_put_work;
+ struct callback_head rsrc_put_tw;
struct llist_head rsrc_put_llist;
struct list_head rsrc_ref_list;
spinlock_t rsrc_ref_lock;
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 238a03087e17..0983dfc9a203 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -49,26 +49,35 @@ struct vm_fault;
*
* IOMAP_F_BUFFER_HEAD indicates that the file system requires the use of
* buffer heads for this mapping.
+ *
+ * IOMAP_F_XATTR indicates that the iomap is for an extended attribute extent
+ * rather than a file data extent.
*/
-#define IOMAP_F_NEW 0x01
-#define IOMAP_F_DIRTY 0x02
-#define IOMAP_F_SHARED 0x04
-#define IOMAP_F_MERGED 0x08
-#define IOMAP_F_BUFFER_HEAD 0x10
-#define IOMAP_F_ZONE_APPEND 0x20
+#define IOMAP_F_NEW (1U << 0)
+#define IOMAP_F_DIRTY (1U << 1)
+#define IOMAP_F_SHARED (1U << 2)
+#define IOMAP_F_MERGED (1U << 3)
+#define IOMAP_F_BUFFER_HEAD (1U << 4)
+#define IOMAP_F_ZONE_APPEND (1U << 5)
+#define IOMAP_F_XATTR (1U << 6)
/*
* Flags set by the core iomap code during operations:
*
* IOMAP_F_SIZE_CHANGED indicates to the iomap_end method that the file size
* has changed as the result of this write operation.
+ *
+ * IOMAP_F_STALE indicates that the iomap is not valid any longer and the file
+ * range it covers needs to be remapped by the high level before the operation
+ * can proceed.
*/
-#define IOMAP_F_SIZE_CHANGED 0x100
+#define IOMAP_F_SIZE_CHANGED (1U << 8)
+#define IOMAP_F_STALE (1U << 9)
/*
* Flags from 0x1000 up are for file system specific usage:
*/
-#define IOMAP_F_PRIVATE 0x1000
+#define IOMAP_F_PRIVATE (1U << 12)
/*
@@ -89,6 +98,7 @@ struct iomap {
void *inline_data;
void *private; /* filesystem private */
const struct iomap_page_ops *page_ops;
+ u64 validity_cookie; /* used with .iomap_valid() */
};
static inline sector_t iomap_sector(const struct iomap *iomap, loff_t pos)
@@ -128,6 +138,23 @@ struct iomap_page_ops {
int (*page_prepare)(struct inode *inode, loff_t pos, unsigned len);
void (*page_done)(struct inode *inode, loff_t pos, unsigned copied,
struct page *page);
+
+ /*
+ * Check that the cached iomap still maps correctly to the filesystem's
+ * internal extent map. FS internal extent maps can change while iomap
+ * is iterating a cached iomap, so this hook allows iomap to detect that
+ * the iomap needs to be refreshed during a long running write
+ * operation.
+ *
+ * The filesystem can store internal state (e.g. a sequence number) in
+ * iomap->validity_cookie when the iomap is first mapped to be able to
+ * detect changes between mapping time and whenever .iomap_valid() is
+ * called.
+ *
+ * This is called with the folio over the specified file position held
+ * locked by the iomap code.
+ */
+ bool (*iomap_valid)(struct inode *inode, const struct iomap *iomap);
};
/*
@@ -226,6 +253,10 @@ static inline const struct iomap *iomap_iter_srcmap(const struct iomap_iter *i)
ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
const struct iomap_ops *ops);
+int iomap_file_buffered_write_punch_delalloc(struct inode *inode,
+ struct iomap *iomap, loff_t pos, loff_t length, ssize_t written,
+ int (*punch)(struct inode *inode, loff_t pos, loff_t length));
+
int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops);
void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops);
bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count);
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index ea30f00dc145..46e1347bfa22 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -64,6 +64,8 @@ struct iommu_domain_geometry {
#define __IOMMU_DOMAIN_PT (1U << 2) /* Domain is identity mapped */
#define __IOMMU_DOMAIN_DMA_FQ (1U << 3) /* DMA-API uses flush queue */
+#define __IOMMU_DOMAIN_SVA (1U << 4) /* Shared process address space */
+
/*
* This are the possible domain-types
*
@@ -77,6 +79,8 @@ struct iommu_domain_geometry {
* certain optimizations for these domains
* IOMMU_DOMAIN_DMA_FQ - As above, but definitely using batched TLB
* invalidation.
+ * IOMMU_DOMAIN_SVA - DMA addresses are shared process addresses
+ * represented by mm_struct's.
*/
#define IOMMU_DOMAIN_BLOCKED (0U)
#define IOMMU_DOMAIN_IDENTITY (__IOMMU_DOMAIN_PT)
@@ -86,15 +90,27 @@ struct iommu_domain_geometry {
#define IOMMU_DOMAIN_DMA_FQ (__IOMMU_DOMAIN_PAGING | \
__IOMMU_DOMAIN_DMA_API | \
__IOMMU_DOMAIN_DMA_FQ)
+#define IOMMU_DOMAIN_SVA (__IOMMU_DOMAIN_SVA)
struct iommu_domain {
unsigned type;
const struct iommu_domain_ops *ops;
unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */
- iommu_fault_handler_t handler;
- void *handler_token;
struct iommu_domain_geometry geometry;
struct iommu_dma_cookie *iova_cookie;
+ enum iommu_page_response_code (*iopf_handler)(struct iommu_fault *fault,
+ void *data);
+ void *fault_data;
+ union {
+ struct {
+ iommu_fault_handler_t handler;
+ void *handler_token;
+ };
+ struct { /* IOMMU_DOMAIN_SVA */
+ struct mm_struct *mm;
+ int users;
+ };
+ };
};
static inline bool iommu_is_dma_domain(struct iommu_domain *domain)
@@ -108,6 +124,11 @@ enum iommu_cap {
IOMMU_CAP_NOEXEC, /* IOMMU_NOEXEC flag */
IOMMU_CAP_PRE_BOOT_PROTECTION, /* Firmware says it used the IOMMU for
DMA protection and we should too */
+ /*
+ * Per-device flag indicating if enforce_cache_coherency() will work on
+ * this device.
+ */
+ IOMMU_CAP_ENFORCE_CACHE_COHERENCY,
};
/* These are the possible reserved region types */
@@ -212,22 +233,22 @@ struct iommu_iotlb_gather {
* @of_xlate: add OF master IDs to iommu grouping
* @is_attach_deferred: Check if domain attach should be deferred from iommu
* driver init to device driver init (default no)
- * @dev_has/enable/disable_feat: per device entries to check/enable/disable
+ * @dev_enable/disable_feat: per device entries to enable/disable
* iommu specific features.
- * @sva_bind: Bind process address space to device
- * @sva_unbind: Unbind process address space from device
- * @sva_get_pasid: Get PASID associated to a SVA handle
* @page_response: handle page request response
* @def_domain_type: device default domain type, return value:
* - IOMMU_DOMAIN_IDENTITY: must use an identity domain
* - IOMMU_DOMAIN_DMA: must use a dma domain
* - 0: use the default setting
* @default_domain_ops: the default ops for domains
+ * @remove_dev_pasid: Remove any translation configurations of a specific
+ * pasid, so that any DMA transactions with this pasid
+ * will be blocked by the hardware.
* @pgsize_bitmap: bitmap of all possible supported page sizes
* @owner: Driver module providing these ops
*/
struct iommu_ops {
- bool (*capable)(enum iommu_cap);
+ bool (*capable)(struct device *dev, enum iommu_cap);
/* Domain allocation and freeing by the iommu driver */
struct iommu_domain *(*domain_alloc)(unsigned iommu_domain_type);
@@ -247,16 +268,12 @@ struct iommu_ops {
int (*dev_enable_feat)(struct device *dev, enum iommu_dev_features f);
int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f);
- struct iommu_sva *(*sva_bind)(struct device *dev, struct mm_struct *mm,
- void *drvdata);
- void (*sva_unbind)(struct iommu_sva *handle);
- u32 (*sva_get_pasid)(struct iommu_sva *handle);
-
int (*page_response)(struct device *dev,
struct iommu_fault_event *evt,
struct iommu_page_response *msg);
int (*def_domain_type)(struct device *dev);
+ void (*remove_dev_pasid)(struct device *dev, ioasid_t pasid);
const struct iommu_domain_ops *default_domain_ops;
unsigned long pgsize_bitmap;
@@ -266,7 +283,20 @@ struct iommu_ops {
/**
* struct iommu_domain_ops - domain specific operations
* @attach_dev: attach an iommu domain to a device
+ * Return:
+ * * 0 - success
+ * * EINVAL - can indicate that device and domain are incompatible due to
+ * some previous configuration of the domain, in which case the
+ * driver shouldn't log an error, since it is legitimate for a
+ * caller to test reuse of existing domains. Otherwise, it may
+ * still represent some other fundamental problem
+ * * ENOMEM - out of memory
+ * * ENOSPC - non-ENOMEM type of resource allocation failures
+ * * EBUSY - device is attached to a domain and cannot be changed
+ * * ENODEV - device specific errors, not able to be attached
+ * * <others> - treated as ENODEV by the caller. Use is discouraged
* @detach_dev: detach an iommu domain from a device
+ * @set_dev_pasid: set an iommu domain to a pasid of device
* @map: map a physically contiguous memory region to an iommu domain
* @map_pages: map a physically contiguous set of pages of the same size to
* an iommu domain.
@@ -287,6 +317,8 @@ struct iommu_ops {
struct iommu_domain_ops {
int (*attach_dev)(struct iommu_domain *domain, struct device *dev);
void (*detach_dev)(struct iommu_domain *domain, struct device *dev);
+ int (*set_dev_pasid)(struct iommu_domain *domain, struct device *dev,
+ ioasid_t pasid);
int (*map)(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
@@ -322,12 +354,14 @@ struct iommu_domain_ops {
* @list: Used by the iommu-core to keep a list of registered iommus
* @ops: iommu-ops for talking to this iommu
* @dev: struct device for sysfs handling
+ * @max_pasids: number of supported PASIDs
*/
struct iommu_device {
struct list_head list;
const struct iommu_ops *ops;
struct fwnode_handle *fwnode;
struct device *dev;
+ u32 max_pasids;
};
/**
@@ -366,6 +400,7 @@ struct iommu_fault_param {
* @fwspec: IOMMU fwspec data
* @iommu_dev: IOMMU device this device is linked to
* @priv: IOMMU Driver private data
+ * @max_pasids: number of PASIDs this device can consume
*
* TODO: migrate other per device data pointers under iommu_dev_data, e.g.
* struct iommu_group *iommu_group;
@@ -377,6 +412,7 @@ struct dev_iommu {
struct iommu_fwspec *fwspec;
struct iommu_device *iommu_dev;
void *priv;
+ u32 max_pasids;
};
int iommu_device_register(struct iommu_device *iommu,
@@ -416,11 +452,9 @@ static inline const struct iommu_ops *dev_iommu_ops(struct device *dev)
return dev->iommu->iommu_dev->ops;
}
-extern int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops);
extern int bus_iommu_probe(struct bus_type *bus);
extern bool iommu_present(struct bus_type *bus);
extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap);
-extern bool iommu_capable(struct bus_type *bus, enum iommu_cap cap);
extern struct iommu_domain *iommu_domain_alloc(struct bus_type *bus);
extern struct iommu_group *iommu_group_get_by_id(int id);
extern void iommu_domain_free(struct iommu_domain *domain);
@@ -457,7 +491,7 @@ extern void iommu_set_default_translated(bool cmd_line);
extern bool iommu_default_passthrough(void);
extern struct iommu_resv_region *
iommu_alloc_resv_region(phys_addr_t start, size_t length, int prot,
- enum iommu_resv_type type);
+ enum iommu_resv_type type, gfp_t gfp);
extern int iommu_get_group_resv_regions(struct iommu_group *group,
struct list_head *head);
@@ -607,6 +641,10 @@ struct iommu_group *fsl_mc_device_group(struct device *dev);
* @flags: IOMMU_FWSPEC_* flags
* @num_ids: number of associated device IDs
* @ids: IDs which this device may present to the IOMMU
+ *
+ * Note that the IDs (and any other information, really) stored in this structure should be
+ * considered private to the IOMMU device driver and are not to be used directly by IOMMU
+ * consumers.
*/
struct iommu_fwspec {
const struct iommu_ops *ops;
@@ -624,6 +662,7 @@ struct iommu_fwspec {
*/
struct iommu_sva {
struct device *dev;
+ struct iommu_domain *domain;
};
int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode,
@@ -665,12 +704,6 @@ void iommu_release_device(struct device *dev);
int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f);
int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features f);
-struct iommu_sva *iommu_sva_bind_device(struct device *dev,
- struct mm_struct *mm,
- void *drvdata);
-void iommu_sva_unbind_device(struct iommu_sva *handle);
-u32 iommu_sva_get_pasid(struct iommu_sva *handle);
-
int iommu_device_use_default_domain(struct device *dev);
void iommu_device_unuse_default_domain(struct device *dev);
@@ -678,6 +711,18 @@ int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner);
void iommu_group_release_dma_owner(struct iommu_group *group);
bool iommu_group_dma_owner_claimed(struct iommu_group *group);
+int iommu_device_claim_dma_owner(struct device *dev, void *owner);
+void iommu_device_release_dma_owner(struct device *dev);
+
+struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
+ struct mm_struct *mm);
+int iommu_attach_device_pasid(struct iommu_domain *domain,
+ struct device *dev, ioasid_t pasid);
+void iommu_detach_device_pasid(struct iommu_domain *domain,
+ struct device *dev, ioasid_t pasid);
+struct iommu_domain *
+iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid,
+ unsigned int type);
#else /* CONFIG_IOMMU_API */
struct iommu_ops {};
@@ -697,11 +742,6 @@ static inline bool device_iommu_capable(struct device *dev, enum iommu_cap cap)
return false;
}
-static inline bool iommu_capable(struct bus_type *bus, enum iommu_cap cap)
-{
- return false;
-}
-
static inline struct iommu_domain *iommu_domain_alloc(struct bus_type *bus)
{
return NULL;
@@ -1002,21 +1042,6 @@ iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat)
return -ENODEV;
}
-static inline struct iommu_sva *
-iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata)
-{
- return NULL;
-}
-
-static inline void iommu_sva_unbind_device(struct iommu_sva *handle)
-{
-}
-
-static inline u32 iommu_sva_get_pasid(struct iommu_sva *handle)
-{
- return IOMMU_PASID_INVALID;
-}
-
static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev)
{
return NULL;
@@ -1045,6 +1070,39 @@ static inline bool iommu_group_dma_owner_claimed(struct iommu_group *group)
{
return false;
}
+
+static inline void iommu_device_release_dma_owner(struct device *dev)
+{
+}
+
+static inline int iommu_device_claim_dma_owner(struct device *dev, void *owner)
+{
+ return -ENODEV;
+}
+
+static inline struct iommu_domain *
+iommu_sva_domain_alloc(struct device *dev, struct mm_struct *mm)
+{
+ return NULL;
+}
+
+static inline int iommu_attach_device_pasid(struct iommu_domain *domain,
+ struct device *dev, ioasid_t pasid)
+{
+ return -ENODEV;
+}
+
+static inline void iommu_detach_device_pasid(struct iommu_domain *domain,
+ struct device *dev, ioasid_t pasid)
+{
+}
+
+static inline struct iommu_domain *
+iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid,
+ unsigned int type)
+{
+ return NULL;
+}
#endif /* CONFIG_IOMMU_API */
/**
@@ -1070,4 +1128,83 @@ void iommu_debugfs_setup(void);
static inline void iommu_debugfs_setup(void) {}
#endif
+#ifdef CONFIG_IOMMU_DMA
+#include <linux/msi.h>
+
+/* Setup call for arch DMA mapping code */
+void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit);
+
+int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base);
+
+int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr);
+void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg);
+
+#else /* CONFIG_IOMMU_DMA */
+
+struct msi_desc;
+struct msi_msg;
+
+static inline void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit)
+{
+}
+
+static inline int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base)
+{
+ return -ENODEV;
+}
+
+static inline int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr)
+{
+ return 0;
+}
+
+static inline void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
+{
+}
+
+#endif /* CONFIG_IOMMU_DMA */
+
+/*
+ * Newer generations of Tegra SoCs require devices' stream IDs to be directly programmed into
+ * some registers. These are always paired with a Tegra SMMU or ARM SMMU, for which the contents
+ * of the struct iommu_fwspec are known. Use this helper to formalize access to these internals.
+ */
+#define TEGRA_STREAM_ID_BYPASS 0x7f
+
+static inline bool tegra_dev_iommu_get_stream_id(struct device *dev, u32 *stream_id)
+{
+#ifdef CONFIG_IOMMU_API
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+
+ if (fwspec && fwspec->num_ids == 1) {
+ *stream_id = fwspec->ids[0] & 0xffff;
+ return true;
+ }
+#endif
+
+ return false;
+}
+
+#ifdef CONFIG_IOMMU_SVA
+struct iommu_sva *iommu_sva_bind_device(struct device *dev,
+ struct mm_struct *mm);
+void iommu_sva_unbind_device(struct iommu_sva *handle);
+u32 iommu_sva_get_pasid(struct iommu_sva *handle);
+#else
+static inline struct iommu_sva *
+iommu_sva_bind_device(struct device *dev, struct mm_struct *mm)
+{
+ return NULL;
+}
+
+static inline void iommu_sva_unbind_device(struct iommu_sva *handle)
+{
+}
+
+static inline u32 iommu_sva_get_pasid(struct iommu_sva *handle)
+{
+ return IOMMU_PASID_INVALID;
+}
+#endif /* CONFIG_IOMMU_SVA */
+
#endif /* __LINUX_IOMMU_H */
diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
new file mode 100644
index 000000000000..650d45629647
--- /dev/null
+++ b/include/linux/iommufd.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2021 Intel Corporation
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
+ */
+#ifndef __LINUX_IOMMUFD_H
+#define __LINUX_IOMMUFD_H
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+
+struct device;
+struct iommufd_device;
+struct page;
+struct iommufd_ctx;
+struct iommufd_access;
+struct file;
+
+struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx,
+ struct device *dev, u32 *id);
+void iommufd_device_unbind(struct iommufd_device *idev);
+
+int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id);
+void iommufd_device_detach(struct iommufd_device *idev);
+
+struct iommufd_access_ops {
+ u8 needs_pin_pages : 1;
+ void (*unmap)(void *data, unsigned long iova, unsigned long length);
+};
+
+enum {
+ IOMMUFD_ACCESS_RW_READ = 0,
+ IOMMUFD_ACCESS_RW_WRITE = 1 << 0,
+ /* Set if the caller is in a kthread then rw will use kthread_use_mm() */
+ IOMMUFD_ACCESS_RW_KTHREAD = 1 << 1,
+
+ /* Only for use by selftest */
+ __IOMMUFD_ACCESS_RW_SLOW_PATH = 1 << 2,
+};
+
+struct iommufd_access *
+iommufd_access_create(struct iommufd_ctx *ictx, u32 ioas_id,
+ const struct iommufd_access_ops *ops, void *data);
+void iommufd_access_destroy(struct iommufd_access *access);
+
+void iommufd_ctx_get(struct iommufd_ctx *ictx);
+
+#if IS_ENABLED(CONFIG_IOMMUFD)
+struct iommufd_ctx *iommufd_ctx_from_file(struct file *file);
+void iommufd_ctx_put(struct iommufd_ctx *ictx);
+
+int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova,
+ unsigned long length, struct page **out_pages,
+ unsigned int flags);
+void iommufd_access_unpin_pages(struct iommufd_access *access,
+ unsigned long iova, unsigned long length);
+int iommufd_access_rw(struct iommufd_access *access, unsigned long iova,
+ void *data, size_t len, unsigned int flags);
+int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id);
+#else /* !CONFIG_IOMMUFD */
+static inline struct iommufd_ctx *iommufd_ctx_from_file(struct file *file)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void iommufd_ctx_put(struct iommufd_ctx *ictx)
+{
+}
+
+static inline int iommufd_access_pin_pages(struct iommufd_access *access,
+ unsigned long iova,
+ unsigned long length,
+ struct page **out_pages,
+ unsigned int flags)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void iommufd_access_unpin_pages(struct iommufd_access *access,
+ unsigned long iova,
+ unsigned long length)
+{
+}
+
+static inline int iommufd_access_rw(struct iommufd_access *access, unsigned long iova,
+ void *data, size_t len, unsigned int flags)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx,
+ u32 *out_ioas_id)
+{
+ return -EOPNOTSUPP;
+}
+#endif /* CONFIG_IOMMUFD */
+#endif
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 616b683563a9..4ae3c541ea6f 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -79,7 +79,8 @@ struct resource {
#define IORESOURCE_IRQ_HIGHLEVEL (1<<2)
#define IORESOURCE_IRQ_LOWLEVEL (1<<3)
#define IORESOURCE_IRQ_SHAREABLE (1<<4)
-#define IORESOURCE_IRQ_OPTIONAL (1<<5)
+#define IORESOURCE_IRQ_OPTIONAL (1<<5)
+#define IORESOURCE_IRQ_WAKECAPABLE (1<<6)
/* PnP DMA specific bits (IORESOURCE_BITS) */
#define IORESOURCE_DMA_TYPE_MASK (3<<0)
@@ -172,6 +173,11 @@ enum {
#define DEFINE_RES_MEM(_start, _size) \
DEFINE_RES_MEM_NAMED((_start), (_size), NULL)
+#define DEFINE_RES_REG_NAMED(_start, _size, _name) \
+ DEFINE_RES_NAMED((_start), (_size), (_name), IORESOURCE_REG)
+#define DEFINE_RES_REG(_start, _size) \
+ DEFINE_RES_REG_NAMED((_start), (_size), NULL)
+
#define DEFINE_RES_IRQ_NAMED(_irq, _name) \
DEFINE_RES_NAMED((_irq), 1, (_name), IORESOURCE_IRQ)
#define DEFINE_RES_IRQ(_irq) \
@@ -312,6 +318,8 @@ extern void __devm_release_region(struct device *dev, struct resource *parent,
resource_size_t start, resource_size_t n);
extern int iomem_map_sanity_check(resource_size_t addr, unsigned long size);
extern bool iomem_is_exclusive(u64 addr);
+extern bool resource_is_exclusive(struct resource *resource, u64 addr,
+ resource_size_t size);
extern int
walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
diff --git a/include/linux/iosys-map.h b/include/linux/iosys-map.h
index a533cae189d7..cb71aa616bd3 100644
--- a/include/linux/iosys-map.h
+++ b/include/linux/iosys-map.h
@@ -46,10 +46,13 @@
*
* iosys_map_set_vaddr(&map, 0xdeadbeaf);
*
- * To set an address in I/O memory, use iosys_map_set_vaddr_iomem().
+ * To set an address in I/O memory, use IOSYS_MAP_INIT_VADDR_IOMEM() or
+ * iosys_map_set_vaddr_iomem().
*
* .. code-block:: c
*
+ * struct iosys_map map = IOSYS_MAP_INIT_VADDR_IOMEM(0xdeadbeaf);
+ *
* iosys_map_set_vaddr_iomem(&map, 0xdeadbeaf);
*
* Instances of struct iosys_map do not have to be cleaned up, but
@@ -122,6 +125,16 @@ struct iosys_map {
}
/**
+ * IOSYS_MAP_INIT_VADDR_IOMEM - Initializes struct iosys_map to an address in I/O memory
+ * @vaddr_iomem_: An I/O-memory address
+ */
+#define IOSYS_MAP_INIT_VADDR_IOMEM(vaddr_iomem_) \
+ { \
+ .vaddr_iomem = (vaddr_iomem_), \
+ .is_iomem = true, \
+ }
+
+/**
* IOSYS_MAP_INIT_OFFSET - Initializes struct iosys_map from another iosys_map
* @map_: The dma-buf mapping structure to copy from
* @offset_: Offset to add to the other mapping
diff --git a/include/linux/iova.h b/include/linux/iova.h
index c6ba6d95d79c..83c00fac2acb 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -75,7 +75,7 @@ static inline unsigned long iova_pfn(struct iova_domain *iovad, dma_addr_t iova)
return iova >> iova_shift(iovad);
}
-#if IS_ENABLED(CONFIG_IOMMU_IOVA)
+#if IS_REACHABLE(CONFIG_IOMMU_IOVA)
int iova_cache_get(void);
void iova_cache_put(void);
diff --git a/include/linux/iova_bitmap.h b/include/linux/iova_bitmap.h
new file mode 100644
index 000000000000..c006cf0a25f3
--- /dev/null
+++ b/include/linux/iova_bitmap.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2022, Oracle and/or its affiliates.
+ * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+#ifndef _IOVA_BITMAP_H_
+#define _IOVA_BITMAP_H_
+
+#include <linux/types.h>
+
+struct iova_bitmap;
+
+typedef int (*iova_bitmap_fn_t)(struct iova_bitmap *bitmap,
+ unsigned long iova, size_t length,
+ void *opaque);
+
+struct iova_bitmap *iova_bitmap_alloc(unsigned long iova, size_t length,
+ unsigned long page_size,
+ u64 __user *data);
+void iova_bitmap_free(struct iova_bitmap *bitmap);
+int iova_bitmap_for_each(struct iova_bitmap *bitmap, void *opaque,
+ iova_bitmap_fn_t fn);
+void iova_bitmap_set(struct iova_bitmap *bitmap,
+ unsigned long iova, size_t length);
+
+#endif
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index e3e8c8662b49..e8240cf2611a 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -11,6 +11,7 @@
#include <linux/refcount.h>
#include <linux/rhashtable-types.h>
#include <linux/sysctl.h>
+#include <linux/percpu_counter.h>
struct user_namespace;
@@ -36,8 +37,8 @@ struct ipc_namespace {
unsigned int msg_ctlmax;
unsigned int msg_ctlmnb;
unsigned int msg_ctlmni;
- atomic_t msg_bytes;
- atomic_t msg_hdrs;
+ struct percpu_counter percpu_msg_bytes;
+ struct percpu_counter percpu_msg_hdrs;
size_t shm_ctlmax;
size_t shm_ctlall;
diff --git a/include/linux/irqchip.h b/include/linux/irqchip.h
index 3a091d0710ae..d5e6024cb2a8 100644
--- a/include/linux/irqchip.h
+++ b/include/linux/irqchip.h
@@ -44,7 +44,8 @@ static const struct of_device_id drv_name##_irqchip_match_table[] = {
#define IRQCHIP_MATCH(compat, fn) { .compatible = compat, \
.data = typecheck_irq_init_cb(fn), },
-#define IRQCHIP_PLATFORM_DRIVER_END(drv_name) \
+
+#define IRQCHIP_PLATFORM_DRIVER_END(drv_name, ...) \
{}, \
}; \
MODULE_DEVICE_TABLE(of, drv_name##_irqchip_match_table); \
@@ -56,6 +57,7 @@ static struct platform_driver drv_name##_driver = { \
.owner = THIS_MODULE, \
.of_match_table = drv_name##_irqchip_match_table, \
.suppress_bind_attrs = true, \
+ __VA_ARGS__ \
}, \
}; \
builtin_platform_driver(drv_name##_driver)
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 1cd4e36890fb..844a8e30e6de 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -169,6 +169,7 @@ int generic_handle_irq_safe(unsigned int irq);
* conversion failed.
*/
int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq);
+int generic_handle_domain_irq_safe(struct irq_domain *domain, unsigned int hwirq);
int generic_handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq);
#endif
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 00d577f90883..a372086750ca 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -31,6 +31,7 @@
#define _LINUX_IRQDOMAIN_H
#include <linux/types.h>
+#include <linux/irqdomain_defs.h>
#include <linux/irqhandler.h>
#include <linux/of.h>
#include <linux/mutex.h>
@@ -45,6 +46,7 @@ struct irq_desc;
struct cpumask;
struct seq_file;
struct irq_affinity_desc;
+struct msi_parent_ops;
#define IRQ_DOMAIN_IRQ_SPEC_PARAMS 16
@@ -68,27 +70,6 @@ struct irq_fwspec {
void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args,
unsigned int count, struct irq_fwspec *fwspec);
-/*
- * Should several domains have the same device node, but serve
- * different purposes (for example one domain is for PCI/MSI, and the
- * other for wired IRQs), they can be distinguished using a
- * bus-specific token. Most domains are expected to only carry
- * DOMAIN_BUS_ANY.
- */
-enum irq_domain_bus_token {
- DOMAIN_BUS_ANY = 0,
- DOMAIN_BUS_WIRED,
- DOMAIN_BUS_GENERIC_MSI,
- DOMAIN_BUS_PCI_MSI,
- DOMAIN_BUS_PLATFORM_MSI,
- DOMAIN_BUS_NEXUS,
- DOMAIN_BUS_IPI,
- DOMAIN_BUS_FSL_MC_MSI,
- DOMAIN_BUS_TI_SCI_INTA_MSI,
- DOMAIN_BUS_WAKEUP,
- DOMAIN_BUS_VMD_MSI,
-};
-
/**
* struct irq_domain_ops - Methods for irq_domain objects
* @match: Match an interrupt controller device node to a host, returns
@@ -137,53 +118,61 @@ struct irq_domain_chip_generic;
/**
* struct irq_domain - Hardware interrupt number translation object
- * @link: Element in global irq_domain list.
- * @name: Name of interrupt domain
- * @ops: pointer to irq_domain methods
- * @host_data: private data pointer for use by owner. Not touched by irq_domain
- * core code.
- * @flags: host per irq_domain flags
- * @mapcount: The number of mapped interrupts
+ * @link: Element in global irq_domain list.
+ * @name: Name of interrupt domain
+ * @ops: Pointer to irq_domain methods
+ * @host_data: Private data pointer for use by owner. Not touched by irq_domain
+ * core code.
+ * @flags: Per irq_domain flags
+ * @mapcount: The number of mapped interrupts
*
- * Optional elements
- * @fwnode: Pointer to firmware node associated with the irq_domain. Pretty easy
- * to swap it for the of_node via the irq_domain_get_of_node accessor
- * @gc: Pointer to a list of generic chips. There is a helper function for
- * setting up one or more generic chips for interrupt controllers
- * drivers using the generic chip library which uses this pointer.
- * @dev: Pointer to a device that the domain represent, and that will be
- * used for power management purposes.
- * @parent: Pointer to parent irq_domain to support hierarchy irq_domains
+ * Optional elements:
+ * @fwnode: Pointer to firmware node associated with the irq_domain. Pretty easy
+ * to swap it for the of_node via the irq_domain_get_of_node accessor
+ * @gc: Pointer to a list of generic chips. There is a helper function for
+ * setting up one or more generic chips for interrupt controllers
+ * drivers using the generic chip library which uses this pointer.
+ * @dev: Pointer to the device which instantiated the irqdomain
+ * With per device irq domains this is not necessarily the same
+ * as @pm_dev.
+ * @pm_dev: Pointer to a device that can be utilized for power management
+ * purposes related to the irq domain.
+ * @parent: Pointer to parent irq_domain to support hierarchy irq_domains
+ * @msi_parent_ops: Pointer to MSI parent domain methods for per device domain init
*
- * Revmap data, used internally by irq_domain
- * @revmap_size: Size of the linear map table @revmap[]
- * @revmap_tree: Radix map tree for hwirqs that don't fit in the linear map
- * @revmap_mutex: Lock for the revmap
- * @revmap: Linear table of irq_data pointers
+ * Revmap data, used internally by the irq domain code:
+ * @revmap_size: Size of the linear map table @revmap[]
+ * @revmap_tree: Radix map tree for hwirqs that don't fit in the linear map
+ * @revmap_mutex: Lock for the revmap
+ * @revmap: Linear table of irq_data pointers
*/
struct irq_domain {
- struct list_head link;
- const char *name;
- const struct irq_domain_ops *ops;
- void *host_data;
- unsigned int flags;
- unsigned int mapcount;
+ struct list_head link;
+ const char *name;
+ const struct irq_domain_ops *ops;
+ void *host_data;
+ unsigned int flags;
+ unsigned int mapcount;
/* Optional data */
- struct fwnode_handle *fwnode;
- enum irq_domain_bus_token bus_token;
- struct irq_domain_chip_generic *gc;
- struct device *dev;
+ struct fwnode_handle *fwnode;
+ enum irq_domain_bus_token bus_token;
+ struct irq_domain_chip_generic *gc;
+ struct device *dev;
+ struct device *pm_dev;
#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
- struct irq_domain *parent;
+ struct irq_domain *parent;
+#endif
+#ifdef CONFIG_GENERIC_MSI_IRQ
+ const struct msi_parent_ops *msi_parent_ops;
#endif
/* reverse map data. The linear map gets appended to the irq_domain */
- irq_hw_number_t hwirq_max;
- unsigned int revmap_size;
- struct radix_tree_root revmap_tree;
- struct mutex revmap_mutex;
- struct irq_data __rcu *revmap[];
+ irq_hw_number_t hwirq_max;
+ unsigned int revmap_size;
+ struct radix_tree_root revmap_tree;
+ struct mutex revmap_mutex;
+ struct irq_data __rcu *revmap[];
};
/* Irq domain flags */
@@ -206,15 +195,14 @@ enum {
/* Irq domain implements MSI remapping */
IRQ_DOMAIN_FLAG_MSI_REMAP = (1 << 5),
- /*
- * Quirk to handle MSI implementations which do not provide
- * masking. Currently known to affect x86, but partially
- * handled in core code.
- */
- IRQ_DOMAIN_MSI_NOMASK_QUIRK = (1 << 6),
-
/* Irq domain doesn't translate anything */
- IRQ_DOMAIN_FLAG_NO_MAP = (1 << 7),
+ IRQ_DOMAIN_FLAG_NO_MAP = (1 << 6),
+
+ /* Irq domain is a MSI parent domain */
+ IRQ_DOMAIN_FLAG_MSI_PARENT = (1 << 8),
+
+ /* Irq domain is a MSI device domain */
+ IRQ_DOMAIN_FLAG_MSI_DEVICE = (1 << 9),
/*
* Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved
@@ -233,7 +221,7 @@ static inline void irq_domain_set_pm_device(struct irq_domain *d,
struct device *dev)
{
if (d)
- d->dev = dev;
+ d->pm_dev = dev;
}
#ifdef CONFIG_IRQ_DOMAIN
@@ -578,6 +566,16 @@ static inline bool irq_domain_is_msi_remap(struct irq_domain *domain)
extern bool irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain);
+static inline bool irq_domain_is_msi_parent(struct irq_domain *domain)
+{
+ return domain->flags & IRQ_DOMAIN_FLAG_MSI_PARENT;
+}
+
+static inline bool irq_domain_is_msi_device(struct irq_domain *domain)
+{
+ return domain->flags & IRQ_DOMAIN_FLAG_MSI_DEVICE;
+}
+
#else /* CONFIG_IRQ_DOMAIN_HIERARCHY */
static inline int irq_domain_alloc_irqs(struct irq_domain *domain,
unsigned int nr_irqs, int node, void *arg)
@@ -623,6 +621,17 @@ irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain)
{
return false;
}
+
+static inline bool irq_domain_is_msi_parent(struct irq_domain *domain)
+{
+ return false;
+}
+
+static inline bool irq_domain_is_msi_device(struct irq_domain *domain)
+{
+ return false;
+}
+
#endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */
#else /* CONFIG_IRQ_DOMAIN */
diff --git a/include/linux/irqdomain_defs.h b/include/linux/irqdomain_defs.h
new file mode 100644
index 000000000000..c29921fd8cd1
--- /dev/null
+++ b/include/linux/irqdomain_defs.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_IRQDOMAIN_DEFS_H
+#define _LINUX_IRQDOMAIN_DEFS_H
+
+/*
+ * Should several domains have the same device node, but serve
+ * different purposes (for example one domain is for PCI/MSI, and the
+ * other for wired IRQs), they can be distinguished using a
+ * bus-specific token. Most domains are expected to only carry
+ * DOMAIN_BUS_ANY.
+ */
+enum irq_domain_bus_token {
+ DOMAIN_BUS_ANY = 0,
+ DOMAIN_BUS_WIRED,
+ DOMAIN_BUS_GENERIC_MSI,
+ DOMAIN_BUS_PCI_MSI,
+ DOMAIN_BUS_PLATFORM_MSI,
+ DOMAIN_BUS_NEXUS,
+ DOMAIN_BUS_IPI,
+ DOMAIN_BUS_FSL_MC_MSI,
+ DOMAIN_BUS_TI_SCI_INTA_MSI,
+ DOMAIN_BUS_WAKEUP,
+ DOMAIN_BUS_VMD_MSI,
+ DOMAIN_BUS_PCI_DEVICE_MSI,
+ DOMAIN_BUS_PCI_DEVICE_MSIX,
+ DOMAIN_BUS_DMAR,
+ DOMAIN_BUS_AMDVI,
+ DOMAIN_BUS_PCI_DEVICE_IMS,
+};
+
+#endif /* _LINUX_IRQDOMAIN_DEFS_H */
diff --git a/include/linux/irqreturn.h b/include/linux/irqreturn.h
index bd4c066ad39b..d426c7ad92bf 100644
--- a/include/linux/irqreturn.h
+++ b/include/linux/irqreturn.h
@@ -3,10 +3,10 @@
#define _LINUX_IRQRETURN_H
/**
- * enum irqreturn
- * @IRQ_NONE interrupt was not from this device or was not handled
- * @IRQ_HANDLED interrupt was handled by this device
- * @IRQ_WAKE_THREAD handler requests to wake the handler thread
+ * enum irqreturn - irqreturn type values
+ * @IRQ_NONE: interrupt was not from this device or was not handled
+ * @IRQ_HANDLED: interrupt was handled by this device
+ * @IRQ_WAKE_THREAD: handler requests to wake the handler thread
*/
enum irqreturn {
IRQ_NONE = (0 << 0),
diff --git a/include/linux/isa.h b/include/linux/isa.h
index e30963190968..4fbbf5e36e08 100644
--- a/include/linux/isa.h
+++ b/include/linux/isa.h
@@ -38,6 +38,32 @@ static inline void isa_unregister_driver(struct isa_driver *d)
}
#endif
+#define module_isa_driver_init(__isa_driver, __num_isa_dev) \
+static int __init __isa_driver##_init(void) \
+{ \
+ return isa_register_driver(&(__isa_driver), __num_isa_dev); \
+} \
+module_init(__isa_driver##_init)
+
+#define module_isa_driver_with_irq_init(__isa_driver, __num_isa_dev, __num_irq) \
+static int __init __isa_driver##_init(void) \
+{ \
+ if (__num_irq != __num_isa_dev) { \
+ pr_err("%s: Number of irq (%u) does not match number of base (%u)\n", \
+ __isa_driver.driver.name, __num_irq, __num_isa_dev); \
+ return -EINVAL; \
+ } \
+ return isa_register_driver(&(__isa_driver), __num_isa_dev); \
+} \
+module_init(__isa_driver##_init)
+
+#define module_isa_driver_exit(__isa_driver) \
+static void __exit __isa_driver##_exit(void) \
+{ \
+ isa_unregister_driver(&(__isa_driver)); \
+} \
+module_exit(__isa_driver##_exit)
+
/**
* module_isa_driver() - Helper macro for registering a ISA driver
* @__isa_driver: isa_driver struct
@@ -48,16 +74,22 @@ static inline void isa_unregister_driver(struct isa_driver *d)
* use this macro once, and calling it replaces module_init and module_exit.
*/
#define module_isa_driver(__isa_driver, __num_isa_dev) \
-static int __init __isa_driver##_init(void) \
-{ \
- return isa_register_driver(&(__isa_driver), __num_isa_dev); \
-} \
-module_init(__isa_driver##_init); \
-static void __exit __isa_driver##_exit(void) \
-{ \
- isa_unregister_driver(&(__isa_driver)); \
-} \
-module_exit(__isa_driver##_exit);
+module_isa_driver_init(__isa_driver, __num_isa_dev); \
+module_isa_driver_exit(__isa_driver)
+
+/**
+ * module_isa_driver_with_irq() - Helper macro for registering an ISA driver with irq
+ * @__isa_driver: isa_driver struct
+ * @__num_isa_dev: number of devices to register
+ * @__num_irq: number of IRQ to register
+ *
+ * Helper macro for ISA drivers with irq that do not do anything special in
+ * module init/exit. Each module may only use this macro once, and calling it
+ * replaces module_init and module_exit.
+ */
+#define module_isa_driver_with_irq(__isa_driver, __num_isa_dev, __num_irq) \
+module_isa_driver_with_irq_init(__isa_driver, __num_isa_dev, __num_irq); \
+module_isa_driver_exit(__isa_driver)
/**
* max_num_isa_dev() - Maximum possible number registered of an ISA device
diff --git a/include/linux/iversion.h b/include/linux/iversion.h
index 3bfebde5a1a6..e27bd4f55d84 100644
--- a/include/linux/iversion.h
+++ b/include/linux/iversion.h
@@ -123,17 +123,12 @@ inode_peek_iversion_raw(const struct inode *inode)
static inline void
inode_set_max_iversion_raw(struct inode *inode, u64 val)
{
- u64 cur, old;
+ u64 cur = inode_peek_iversion_raw(inode);
- cur = inode_peek_iversion_raw(inode);
- for (;;) {
+ do {
if (cur > val)
break;
- old = atomic64_cmpxchg(&inode->i_version, cur, val);
- if (likely(old == cur))
- break;
- cur = old;
- }
+ } while (!atomic64_try_cmpxchg(&inode->i_version, &cur, val));
}
/**
@@ -177,56 +172,7 @@ inode_set_iversion_queried(struct inode *inode, u64 val)
I_VERSION_QUERIED);
}
-/**
- * inode_maybe_inc_iversion - increments i_version
- * @inode: inode with the i_version that should be updated
- * @force: increment the counter even if it's not necessary?
- *
- * Every time the inode is modified, the i_version field must be seen to have
- * changed by any observer.
- *
- * If "force" is set or the QUERIED flag is set, then ensure that we increment
- * the value, and clear the queried flag.
- *
- * In the common case where neither is set, then we can return "false" without
- * updating i_version.
- *
- * If this function returns false, and no other metadata has changed, then we
- * can avoid logging the metadata.
- */
-static inline bool
-inode_maybe_inc_iversion(struct inode *inode, bool force)
-{
- u64 cur, old, new;
-
- /*
- * The i_version field is not strictly ordered with any other inode
- * information, but the legacy inode_inc_iversion code used a spinlock
- * to serialize increments.
- *
- * Here, we add full memory barriers to ensure that any de-facto
- * ordering with other info is preserved.
- *
- * This barrier pairs with the barrier in inode_query_iversion()
- */
- smp_mb();
- cur = inode_peek_iversion_raw(inode);
- for (;;) {
- /* If flag is clear then we needn't do anything */
- if (!force && !(cur & I_VERSION_QUERIED))
- return false;
-
- /* Since lowest bit is flag, add 2 to avoid it */
- new = (cur & ~I_VERSION_QUERIED) + I_VERSION_INCREMENT;
-
- old = atomic64_cmpxchg(&inode->i_version, cur, new);
- if (likely(old == cur))
- break;
- cur = old;
- }
- return true;
-}
-
+bool inode_maybe_inc_iversion(struct inode *inode, bool force);
/**
* inode_inc_iversion - forcibly increment i_version
@@ -304,10 +250,10 @@ inode_peek_iversion(const struct inode *inode)
static inline u64
inode_query_iversion(struct inode *inode)
{
- u64 cur, old, new;
+ u64 cur, new;
cur = inode_peek_iversion_raw(inode);
- for (;;) {
+ do {
/* If flag is already set, then no need to swap */
if (cur & I_VERSION_QUERIED) {
/*
@@ -320,11 +266,7 @@ inode_query_iversion(struct inode *inode)
}
new = cur | I_VERSION_QUERIED;
- old = atomic64_cmpxchg(&inode->i_version, cur, new);
- if (likely(old == cur))
- break;
- cur = old;
- }
+ } while (!atomic64_try_cmpxchg(&inode->i_version, &cur, new));
return cur >> I_VERSION_QUERIED_SHIFT;
}
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 0b7242370b56..2170e0cc279d 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1662,7 +1662,7 @@ int jbd2_fc_begin_commit(journal_t *journal, tid_t tid);
int jbd2_fc_end_commit(journal_t *journal);
int jbd2_fc_end_commit_fallback(journal_t *journal);
int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out);
-int jbd2_submit_inode_data(struct jbd2_inode *jinode);
+int jbd2_submit_inode_data(journal_t *journal, struct jbd2_inode *jinode);
int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode);
int jbd2_fc_wait_bufs(journal_t *journal, int num_blks);
int jbd2_fc_release_bufs(journal_t *journal);
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 570831ca9951..4e968ebadce6 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -224,9 +224,10 @@ extern bool arch_jump_label_transform_queue(struct jump_entry *entry,
enum jump_label_type type);
extern void arch_jump_label_transform_apply(void);
extern int jump_label_text_reserved(void *start, void *end);
-extern void static_key_slow_inc(struct static_key *key);
+extern bool static_key_slow_inc(struct static_key *key);
+extern bool static_key_fast_inc_not_disabled(struct static_key *key);
extern void static_key_slow_dec(struct static_key *key);
-extern void static_key_slow_inc_cpuslocked(struct static_key *key);
+extern bool static_key_slow_inc_cpuslocked(struct static_key *key);
extern void static_key_slow_dec_cpuslocked(struct static_key *key);
extern int static_key_count(struct static_key *key);
extern void static_key_enable(struct static_key *key);
@@ -278,11 +279,23 @@ static __always_inline bool static_key_true(struct static_key *key)
return false;
}
-static inline void static_key_slow_inc(struct static_key *key)
+static inline bool static_key_fast_inc_not_disabled(struct static_key *key)
{
+ int v;
+
STATIC_KEY_CHECK_USE(key);
- atomic_inc(&key->enabled);
+ /*
+ * Prevent key->enabled getting negative to follow the same semantics
+ * as for CONFIG_JUMP_LABEL=y, see kernel/jump_label.c comment.
+ */
+ v = atomic_read(&key->enabled);
+ do {
+ if (v < 0 || (v + 1) < 0)
+ return false;
+ } while (!likely(atomic_try_cmpxchg(&key->enabled, &v, v + 1)));
+ return true;
}
+#define static_key_slow_inc(key) static_key_fast_inc_not_disabled(key)
static inline void static_key_slow_dec(struct static_key *key)
{
diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h
index ad39636e0c3f..0065209cc004 100644
--- a/include/linux/kallsyms.h
+++ b/include/linux/kallsyms.h
@@ -15,7 +15,7 @@
#include <asm/sections.h>
-#define KSYM_NAME_LEN 128
+#define KSYM_NAME_LEN 512
#define KSYM_SYMBOL_LEN (sizeof("%s+%#lx/%#lx [%s %s]") + \
(KSYM_NAME_LEN - 1) + \
2*(BITS_PER_LONG*3/10) + (MODULE_NAME_LEN - 1) + \
@@ -66,9 +66,12 @@ static inline void *dereference_symbol_descriptor(void *ptr)
}
#ifdef CONFIG_KALLSYMS
+unsigned long kallsyms_sym_address(int idx);
int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *,
unsigned long),
void *data);
+int kallsyms_on_each_match_symbol(int (*fn)(void *, unsigned long),
+ const char *name, void *data);
/* Lookup the address for a symbol. Returns 0 if not found. */
unsigned long kallsyms_lookup_name(const char *name);
@@ -168,6 +171,12 @@ static inline int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct
{
return -EOPNOTSUPP;
}
+
+static inline int kallsyms_on_each_match_symbol(int (*fn)(void *, unsigned long),
+ const char *name, void *data)
+{
+ return -EOPNOTSUPP;
+}
#endif /*CONFIG_KALLSYMS*/
static inline void print_ip_sym(const char *loglvl, unsigned long ip)
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index b092277bf48d..96c9d56e5510 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -98,19 +98,13 @@ static inline bool kasan_has_integrated_init(void)
#ifdef CONFIG_KASAN
struct kasan_cache {
+#ifdef CONFIG_KASAN_GENERIC
int alloc_meta_offset;
int free_meta_offset;
+#endif
bool is_kmalloc;
};
-slab_flags_t __kasan_never_merge(void);
-static __always_inline slab_flags_t kasan_never_merge(void)
-{
- if (kasan_enabled())
- return __kasan_never_merge();
- return 0;
-}
-
void __kasan_unpoison_range(const void *addr, size_t size);
static __always_inline void kasan_unpoison_range(const void *addr, size_t size)
{
@@ -134,15 +128,6 @@ static __always_inline void kasan_unpoison_pages(struct page *page,
__kasan_unpoison_pages(page, order, init);
}
-void __kasan_cache_create(struct kmem_cache *cache, unsigned int *size,
- slab_flags_t *flags);
-static __always_inline void kasan_cache_create(struct kmem_cache *cache,
- unsigned int *size, slab_flags_t *flags)
-{
- if (kasan_enabled())
- __kasan_cache_create(cache, size, flags);
-}
-
void __kasan_cache_create_kmalloc(struct kmem_cache *cache);
static __always_inline void kasan_cache_create_kmalloc(struct kmem_cache *cache)
{
@@ -150,14 +135,6 @@ static __always_inline void kasan_cache_create_kmalloc(struct kmem_cache *cache)
__kasan_cache_create_kmalloc(cache);
}
-size_t __kasan_metadata_size(struct kmem_cache *cache);
-static __always_inline size_t kasan_metadata_size(struct kmem_cache *cache)
-{
- if (kasan_enabled())
- return __kasan_metadata_size(cache);
- return 0;
-}
-
void __kasan_poison_slab(struct slab *slab);
static __always_inline void kasan_poison_slab(struct slab *slab)
{
@@ -269,20 +246,12 @@ static __always_inline bool kasan_check_byte(const void *addr)
#else /* CONFIG_KASAN */
-static inline slab_flags_t kasan_never_merge(void)
-{
- return 0;
-}
static inline void kasan_unpoison_range(const void *address, size_t size) {}
static inline void kasan_poison_pages(struct page *page, unsigned int order,
bool init) {}
static inline void kasan_unpoison_pages(struct page *page, unsigned int order,
bool init) {}
-static inline void kasan_cache_create(struct kmem_cache *cache,
- unsigned int *size,
- slab_flags_t *flags) {}
static inline void kasan_cache_create_kmalloc(struct kmem_cache *cache) {}
-static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; }
static inline void kasan_poison_slab(struct slab *slab) {}
static inline void kasan_unpoison_object_data(struct kmem_cache *cache,
void *object) {}
@@ -333,6 +302,11 @@ static inline void kasan_unpoison_task_stack(struct task_struct *task) {}
#ifdef CONFIG_KASAN_GENERIC
+size_t kasan_metadata_size(struct kmem_cache *cache, bool in_object);
+slab_flags_t kasan_never_merge(void);
+void kasan_cache_create(struct kmem_cache *cache, unsigned int *size,
+ slab_flags_t *flags);
+
void kasan_cache_shrink(struct kmem_cache *cache);
void kasan_cache_shutdown(struct kmem_cache *cache);
void kasan_record_aux_stack(void *ptr);
@@ -340,6 +314,22 @@ void kasan_record_aux_stack_noalloc(void *ptr);
#else /* CONFIG_KASAN_GENERIC */
+/* Tag-based KASAN modes do not use per-object metadata. */
+static inline size_t kasan_metadata_size(struct kmem_cache *cache,
+ bool in_object)
+{
+ return 0;
+}
+/* And thus nothing prevents cache merging. */
+static inline slab_flags_t kasan_never_merge(void)
+{
+ return 0;
+}
+/* And no cache-related metadata initialization is required. */
+static inline void kasan_cache_create(struct kmem_cache *cache,
+ unsigned int *size,
+ slab_flags_t *flags) {}
+
static inline void kasan_cache_shrink(struct kmem_cache *cache) {}
static inline void kasan_cache_shutdown(struct kmem_cache *cache) {}
static inline void kasan_record_aux_stack(void *ptr) {}
diff --git a/include/linux/kcov.h b/include/linux/kcov.h
index 55dc338f6bcd..ee04256f28af 100644
--- a/include/linux/kcov.h
+++ b/include/linux/kcov.h
@@ -56,7 +56,7 @@ static inline void kcov_remote_start_usb(u64 id)
/*
* The softirq flavor of kcov_remote_*() functions is introduced as a temporary
* work around for kcov's lack of nested remote coverage sections support in
- * task context. Adding suport for nested sections is tracked in:
+ * task context. Adding support for nested sections is tracked in:
* https://bugzilla.kernel.org/show_bug.cgi?id=210337
*/
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 367044d7708c..73f5c120def8 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -108,10 +108,12 @@ enum kernfs_node_flag {
KERNFS_HAS_SEQ_SHOW = 0x0040,
KERNFS_HAS_MMAP = 0x0080,
KERNFS_LOCKDEP = 0x0100,
+ KERNFS_HIDDEN = 0x0200,
KERNFS_SUICIDAL = 0x0400,
KERNFS_SUICIDED = 0x0800,
KERNFS_EMPTY_DIR = 0x1000,
KERNFS_HAS_RELEASE = 0x2000,
+ KERNFS_REMOVING = 0x4000,
};
/* @flags for kernfs_create_root() */
@@ -429,6 +431,7 @@ struct kernfs_node *kernfs_create_link(struct kernfs_node *parent,
const char *name,
struct kernfs_node *target);
void kernfs_activate(struct kernfs_node *kn);
+void kernfs_show(struct kernfs_node *kn, bool show);
void kernfs_remove(struct kernfs_node *kn);
void kernfs_break_active_protection(struct kernfs_node *kn);
void kernfs_unbreak_active_protection(struct kernfs_node *kn);
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 13e6c4b58f07..5dd4343c1bbe 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -17,6 +17,7 @@
#include <linux/crash_core.h>
#include <asm/io.h>
+#include <linux/range.h>
#include <uapi/linux/kexec.h>
#include <linux/verification.h>
@@ -240,14 +241,10 @@ static inline int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf)
/* Alignment required for elf header segment */
#define ELF_CORE_HEADER_ALIGN 4096
-struct crash_mem_range {
- u64 start, end;
-};
-
struct crash_mem {
unsigned int max_nr_ranges;
unsigned int nr_ranges;
- struct crash_mem_range ranges[];
+ struct range ranges[];
};
extern int crash_exclude_mem_range(struct crash_mem *mem,
@@ -427,7 +424,7 @@ extern int kexec_load_disabled;
extern bool kexec_in_progress;
int crash_shrink_memory(unsigned long new_size);
-size_t crash_get_memory_size(void);
+ssize_t crash_get_memory_size(void);
#ifndef arch_kexec_protect_crashkres
/*
diff --git a/include/linux/key.h b/include/linux/key.h
index 7febc4881363..d27477faf00d 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -88,6 +88,12 @@ enum key_need_perm {
KEY_DEFER_PERM_CHECK, /* Special: permission check is deferred */
};
+enum key_lookup_flag {
+ KEY_LOOKUP_CREATE = 0x01,
+ KEY_LOOKUP_PARTIAL = 0x02,
+ KEY_LOOKUP_ALL = (KEY_LOOKUP_CREATE | KEY_LOOKUP_PARTIAL),
+};
+
struct seq_file;
struct user_struct;
struct signal_struct;
diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
index 384f034ae947..f68865e19b0b 100644
--- a/include/linux/khugepaged.h
+++ b/include/linux/khugepaged.h
@@ -15,12 +15,15 @@ extern void __khugepaged_exit(struct mm_struct *mm);
extern void khugepaged_enter_vma(struct vm_area_struct *vma,
unsigned long vm_flags);
extern void khugepaged_min_free_kbytes_update(void);
+extern bool current_is_khugepaged(void);
#ifdef CONFIG_SHMEM
-extern void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr);
+extern int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
+ bool install_pmd);
#else
-static inline void collapse_pte_mapped_thp(struct mm_struct *mm,
- unsigned long addr)
+static inline int collapse_pte_mapped_thp(struct mm_struct *mm,
+ unsigned long addr, bool install_pmd)
{
+ return 0;
}
#endif
@@ -46,14 +49,20 @@ static inline void khugepaged_enter_vma(struct vm_area_struct *vma,
unsigned long vm_flags)
{
}
-static inline void collapse_pte_mapped_thp(struct mm_struct *mm,
- unsigned long addr)
+static inline int collapse_pte_mapped_thp(struct mm_struct *mm,
+ unsigned long addr, bool install_pmd)
{
+ return 0;
}
static inline void khugepaged_min_free_kbytes_update(void)
{
}
+
+static inline bool current_is_khugepaged(void)
+{
+ return false;
+}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif /* _LINUX_KHUGEPAGED_H */
diff --git a/include/linux/kmsan-checks.h b/include/linux/kmsan-checks.h
new file mode 100644
index 000000000000..c4cae333deec
--- /dev/null
+++ b/include/linux/kmsan-checks.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * KMSAN checks to be used for one-off annotations in subsystems.
+ *
+ * Copyright (C) 2017-2022 Google LLC
+ * Author: Alexander Potapenko <glider@google.com>
+ *
+ */
+
+#ifndef _LINUX_KMSAN_CHECKS_H
+#define _LINUX_KMSAN_CHECKS_H
+
+#include <linux/types.h>
+
+#ifdef CONFIG_KMSAN
+
+/**
+ * kmsan_poison_memory() - Mark the memory range as uninitialized.
+ * @address: address to start with.
+ * @size: size of buffer to poison.
+ * @flags: GFP flags for allocations done by this function.
+ *
+ * Until other data is written to this range, KMSAN will treat it as
+ * uninitialized. Error reports for this memory will reference the call site of
+ * kmsan_poison_memory() as origin.
+ */
+void kmsan_poison_memory(const void *address, size_t size, gfp_t flags);
+
+/**
+ * kmsan_unpoison_memory() - Mark the memory range as initialized.
+ * @address: address to start with.
+ * @size: size of buffer to unpoison.
+ *
+ * Until other data is written to this range, KMSAN will treat it as
+ * initialized.
+ */
+void kmsan_unpoison_memory(const void *address, size_t size);
+
+/**
+ * kmsan_check_memory() - Check the memory range for being initialized.
+ * @address: address to start with.
+ * @size: size of buffer to check.
+ *
+ * If any piece of the given range is marked as uninitialized, KMSAN will report
+ * an error.
+ */
+void kmsan_check_memory(const void *address, size_t size);
+
+/**
+ * kmsan_copy_to_user() - Notify KMSAN about a data transfer to userspace.
+ * @to: destination address in the userspace.
+ * @from: source address in the kernel.
+ * @to_copy: number of bytes to copy.
+ * @left: number of bytes not copied.
+ *
+ * If this is a real userspace data transfer, KMSAN checks the bytes that were
+ * actually copied to ensure there was no information leak. If @to belongs to
+ * the kernel space (which is possible for compat syscalls), KMSAN just copies
+ * the metadata.
+ */
+void kmsan_copy_to_user(void __user *to, const void *from, size_t to_copy,
+ size_t left);
+
+#else
+
+static inline void kmsan_poison_memory(const void *address, size_t size,
+ gfp_t flags)
+{
+}
+static inline void kmsan_unpoison_memory(const void *address, size_t size)
+{
+}
+static inline void kmsan_check_memory(const void *address, size_t size)
+{
+}
+static inline void kmsan_copy_to_user(void __user *to, const void *from,
+ size_t to_copy, size_t left)
+{
+}
+
+#endif
+
+#endif /* _LINUX_KMSAN_CHECKS_H */
diff --git a/include/linux/kmsan.h b/include/linux/kmsan.h
new file mode 100644
index 000000000000..e38ae3c34618
--- /dev/null
+++ b/include/linux/kmsan.h
@@ -0,0 +1,330 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * KMSAN API for subsystems.
+ *
+ * Copyright (C) 2017-2022 Google LLC
+ * Author: Alexander Potapenko <glider@google.com>
+ *
+ */
+#ifndef _LINUX_KMSAN_H
+#define _LINUX_KMSAN_H
+
+#include <linux/dma-direction.h>
+#include <linux/gfp.h>
+#include <linux/kmsan-checks.h>
+#include <linux/types.h>
+
+struct page;
+struct kmem_cache;
+struct task_struct;
+struct scatterlist;
+struct urb;
+
+#ifdef CONFIG_KMSAN
+
+/**
+ * kmsan_task_create() - Initialize KMSAN state for the task.
+ * @task: task to initialize.
+ */
+void kmsan_task_create(struct task_struct *task);
+
+/**
+ * kmsan_task_exit() - Notify KMSAN that a task has exited.
+ * @task: task about to finish.
+ */
+void kmsan_task_exit(struct task_struct *task);
+
+/**
+ * kmsan_init_shadow() - Initialize KMSAN shadow at boot time.
+ *
+ * Allocate and initialize KMSAN metadata for early allocations.
+ */
+void __init kmsan_init_shadow(void);
+
+/**
+ * kmsan_init_runtime() - Initialize KMSAN state and enable KMSAN.
+ */
+void __init kmsan_init_runtime(void);
+
+/**
+ * kmsan_memblock_free_pages() - handle freeing of memblock pages.
+ * @page: struct page to free.
+ * @order: order of @page.
+ *
+ * Freed pages are either returned to buddy allocator or held back to be used
+ * as metadata pages.
+ */
+bool __init kmsan_memblock_free_pages(struct page *page, unsigned int order);
+
+/**
+ * kmsan_alloc_page() - Notify KMSAN about an alloc_pages() call.
+ * @page: struct page pointer returned by alloc_pages().
+ * @order: order of allocated struct page.
+ * @flags: GFP flags used by alloc_pages()
+ *
+ * KMSAN marks 1<<@order pages starting at @page as uninitialized, unless
+ * @flags contain __GFP_ZERO.
+ */
+void kmsan_alloc_page(struct page *page, unsigned int order, gfp_t flags);
+
+/**
+ * kmsan_free_page() - Notify KMSAN about a free_pages() call.
+ * @page: struct page pointer passed to free_pages().
+ * @order: order of deallocated struct page.
+ *
+ * KMSAN marks freed memory as uninitialized.
+ */
+void kmsan_free_page(struct page *page, unsigned int order);
+
+/**
+ * kmsan_copy_page_meta() - Copy KMSAN metadata between two pages.
+ * @dst: destination page.
+ * @src: source page.
+ *
+ * KMSAN copies the contents of metadata pages for @src into the metadata pages
+ * for @dst. If @dst has no associated metadata pages, nothing happens.
+ * If @src has no associated metadata pages, @dst metadata pages are unpoisoned.
+ */
+void kmsan_copy_page_meta(struct page *dst, struct page *src);
+
+/**
+ * kmsan_slab_alloc() - Notify KMSAN about a slab allocation.
+ * @s: slab cache the object belongs to.
+ * @object: object pointer.
+ * @flags: GFP flags passed to the allocator.
+ *
+ * Depending on cache flags and GFP flags, KMSAN sets up the metadata of the
+ * newly created object, marking it as initialized or uninitialized.
+ */
+void kmsan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags);
+
+/**
+ * kmsan_slab_free() - Notify KMSAN about a slab deallocation.
+ * @s: slab cache the object belongs to.
+ * @object: object pointer.
+ *
+ * KMSAN marks the freed object as uninitialized.
+ */
+void kmsan_slab_free(struct kmem_cache *s, void *object);
+
+/**
+ * kmsan_kmalloc_large() - Notify KMSAN about a large slab allocation.
+ * @ptr: object pointer.
+ * @size: object size.
+ * @flags: GFP flags passed to the allocator.
+ *
+ * Similar to kmsan_slab_alloc(), but for large allocations.
+ */
+void kmsan_kmalloc_large(const void *ptr, size_t size, gfp_t flags);
+
+/**
+ * kmsan_kfree_large() - Notify KMSAN about a large slab deallocation.
+ * @ptr: object pointer.
+ *
+ * Similar to kmsan_slab_free(), but for large allocations.
+ */
+void kmsan_kfree_large(const void *ptr);
+
+/**
+ * kmsan_map_kernel_range_noflush() - Notify KMSAN about a vmap.
+ * @start: start of vmapped range.
+ * @end: end of vmapped range.
+ * @prot: page protection flags used for vmap.
+ * @pages: array of pages.
+ * @page_shift: page_shift passed to vmap_range_noflush().
+ *
+ * KMSAN maps shadow and origin pages of @pages into contiguous ranges in
+ * vmalloc metadata address range.
+ */
+void kmsan_vmap_pages_range_noflush(unsigned long start, unsigned long end,
+ pgprot_t prot, struct page **pages,
+ unsigned int page_shift);
+
+/**
+ * kmsan_vunmap_kernel_range_noflush() - Notify KMSAN about a vunmap.
+ * @start: start of vunmapped range.
+ * @end: end of vunmapped range.
+ *
+ * KMSAN unmaps the contiguous metadata ranges created by
+ * kmsan_map_kernel_range_noflush().
+ */
+void kmsan_vunmap_range_noflush(unsigned long start, unsigned long end);
+
+/**
+ * kmsan_ioremap_page_range() - Notify KMSAN about a ioremap_page_range() call.
+ * @addr: range start.
+ * @end: range end.
+ * @phys_addr: physical range start.
+ * @prot: page protection flags used for ioremap_page_range().
+ * @page_shift: page_shift argument passed to vmap_range_noflush().
+ *
+ * KMSAN creates new metadata pages for the physical pages mapped into the
+ * virtual memory.
+ */
+void kmsan_ioremap_page_range(unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot,
+ unsigned int page_shift);
+
+/**
+ * kmsan_iounmap_page_range() - Notify KMSAN about a iounmap_page_range() call.
+ * @start: range start.
+ * @end: range end.
+ *
+ * KMSAN unmaps the metadata pages for the given range and, unlike for
+ * vunmap_page_range(), also deallocates them.
+ */
+void kmsan_iounmap_page_range(unsigned long start, unsigned long end);
+
+/**
+ * kmsan_handle_dma() - Handle a DMA data transfer.
+ * @page: first page of the buffer.
+ * @offset: offset of the buffer within the first page.
+ * @size: buffer size.
+ * @dir: one of possible dma_data_direction values.
+ *
+ * Depending on @direction, KMSAN:
+ * * checks the buffer, if it is copied to device;
+ * * initializes the buffer, if it is copied from device;
+ * * does both, if this is a DMA_BIDIRECTIONAL transfer.
+ */
+void kmsan_handle_dma(struct page *page, size_t offset, size_t size,
+ enum dma_data_direction dir);
+
+/**
+ * kmsan_handle_dma_sg() - Handle a DMA transfer using scatterlist.
+ * @sg: scatterlist holding DMA buffers.
+ * @nents: number of scatterlist entries.
+ * @dir: one of possible dma_data_direction values.
+ *
+ * Depending on @direction, KMSAN:
+ * * checks the buffers in the scatterlist, if they are copied to device;
+ * * initializes the buffers, if they are copied from device;
+ * * does both, if this is a DMA_BIDIRECTIONAL transfer.
+ */
+void kmsan_handle_dma_sg(struct scatterlist *sg, int nents,
+ enum dma_data_direction dir);
+
+/**
+ * kmsan_handle_urb() - Handle a USB data transfer.
+ * @urb: struct urb pointer.
+ * @is_out: data transfer direction (true means output to hardware).
+ *
+ * If @is_out is true, KMSAN checks the transfer buffer of @urb. Otherwise,
+ * KMSAN initializes the transfer buffer.
+ */
+void kmsan_handle_urb(const struct urb *urb, bool is_out);
+
+/**
+ * kmsan_unpoison_entry_regs() - Handle pt_regs in low-level entry code.
+ * @regs: struct pt_regs pointer received from assembly code.
+ *
+ * KMSAN unpoisons the contents of the passed pt_regs, preventing potential
+ * false positive reports. Unlike kmsan_unpoison_memory(),
+ * kmsan_unpoison_entry_regs() can be called from the regions where
+ * kmsan_in_runtime() returns true, which is the case in early entry code.
+ */
+void kmsan_unpoison_entry_regs(const struct pt_regs *regs);
+
+#else
+
+static inline void kmsan_init_shadow(void)
+{
+}
+
+static inline void kmsan_init_runtime(void)
+{
+}
+
+static inline bool kmsan_memblock_free_pages(struct page *page,
+ unsigned int order)
+{
+ return true;
+}
+
+static inline void kmsan_task_create(struct task_struct *task)
+{
+}
+
+static inline void kmsan_task_exit(struct task_struct *task)
+{
+}
+
+static inline int kmsan_alloc_page(struct page *page, unsigned int order,
+ gfp_t flags)
+{
+ return 0;
+}
+
+static inline void kmsan_free_page(struct page *page, unsigned int order)
+{
+}
+
+static inline void kmsan_copy_page_meta(struct page *dst, struct page *src)
+{
+}
+
+static inline void kmsan_slab_alloc(struct kmem_cache *s, void *object,
+ gfp_t flags)
+{
+}
+
+static inline void kmsan_slab_free(struct kmem_cache *s, void *object)
+{
+}
+
+static inline void kmsan_kmalloc_large(const void *ptr, size_t size,
+ gfp_t flags)
+{
+}
+
+static inline void kmsan_kfree_large(const void *ptr)
+{
+}
+
+static inline void kmsan_vmap_pages_range_noflush(unsigned long start,
+ unsigned long end,
+ pgprot_t prot,
+ struct page **pages,
+ unsigned int page_shift)
+{
+}
+
+static inline void kmsan_vunmap_range_noflush(unsigned long start,
+ unsigned long end)
+{
+}
+
+static inline void kmsan_ioremap_page_range(unsigned long start,
+ unsigned long end,
+ phys_addr_t phys_addr,
+ pgprot_t prot,
+ unsigned int page_shift)
+{
+}
+
+static inline void kmsan_iounmap_page_range(unsigned long start,
+ unsigned long end)
+{
+}
+
+static inline void kmsan_handle_dma(struct page *page, size_t offset,
+ size_t size, enum dma_data_direction dir)
+{
+}
+
+static inline void kmsan_handle_dma_sg(struct scatterlist *sg, int nents,
+ enum dma_data_direction dir)
+{
+}
+
+static inline void kmsan_handle_urb(const struct urb *urb, bool is_out)
+{
+}
+
+static inline void kmsan_unpoison_entry_regs(const struct pt_regs *regs)
+{
+}
+
+#endif
+
+#endif /* _LINUX_KMSAN_H */
diff --git a/include/linux/kmsan_string.h b/include/linux/kmsan_string.h
new file mode 100644
index 000000000000..7287da6f52ef
--- /dev/null
+++ b/include/linux/kmsan_string.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * KMSAN string functions API used in other headers.
+ *
+ * Copyright (C) 2022 Google LLC
+ * Author: Alexander Potapenko <glider@google.com>
+ *
+ */
+#ifndef _LINUX_KMSAN_STRING_H
+#define _LINUX_KMSAN_STRING_H
+
+/*
+ * KMSAN overrides the default memcpy/memset/memmove implementations in the
+ * kernel, which requires having __msan_XXX function prototypes in several other
+ * headers. Keep them in one place instead of open-coding.
+ */
+void *__msan_memcpy(void *dst, const void *src, size_t size);
+void *__msan_memset(void *s, int c, size_t n);
+void *__msan_memmove(void *dest, const void *src, size_t len);
+
+#endif /* _LINUX_KMSAN_STRING_H */
diff --git a/include/linux/kmsan_types.h b/include/linux/kmsan_types.h
new file mode 100644
index 000000000000..8bfa6c98176d
--- /dev/null
+++ b/include/linux/kmsan_types.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * A minimal header declaring types added by KMSAN to existing kernel structs.
+ *
+ * Copyright (C) 2017-2022 Google LLC
+ * Author: Alexander Potapenko <glider@google.com>
+ *
+ */
+#ifndef _LINUX_KMSAN_TYPES_H
+#define _LINUX_KMSAN_TYPES_H
+
+/* These constants are defined in the MSan LLVM instrumentation pass. */
+#define KMSAN_RETVAL_SIZE 800
+#define KMSAN_PARAM_SIZE 800
+
+struct kmsan_context_state {
+ char param_tls[KMSAN_PARAM_SIZE];
+ char retval_tls[KMSAN_RETVAL_SIZE];
+ char va_arg_tls[KMSAN_PARAM_SIZE];
+ char va_arg_origin_tls[KMSAN_PARAM_SIZE];
+ u64 va_arg_overflow_size_tls;
+ char param_origin_tls[KMSAN_PARAM_SIZE];
+ u32 retval_origin_tls;
+};
+
+#undef KMSAN_PARAM_SIZE
+#undef KMSAN_RETVAL_SIZE
+
+struct kmsan_ctx {
+ struct kmsan_context_state cstate;
+ int kmsan_in_runtime;
+ bool allow_reporting;
+};
+
+#endif /* _LINUX_KMSAN_TYPES_H */
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 55041d2f884d..a0b92be98984 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -103,6 +103,7 @@ struct kprobe {
* this flag is only for optimized_kprobe.
*/
#define KPROBE_FLAG_FTRACE 8 /* probe is using ftrace */
+#define KPROBE_FLAG_ON_FUNC_ENTRY 16 /* probe is on the function entry */
/* Has this kprobe gone ? */
static inline bool kprobe_gone(struct kprobe *p)
diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 0b4f17418f64..7e232ba59b86 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -15,9 +15,6 @@
#include <linux/sched.h>
#include <linux/sched/coredump.h>
-struct stable_node;
-struct mem_cgroup;
-
#ifdef CONFIG_KSM
int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
unsigned long end, int advice, unsigned long *vm_flags);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 1c480b1821e1..915142abdf76 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -151,12 +151,11 @@ static inline bool is_error_page(struct page *page)
#define KVM_REQUEST_NO_ACTION BIT(10)
/*
* Architecture-independent vcpu->requests bit members
- * Bits 4-7 are reserved for more arch-independent bits.
+ * Bits 3-7 are reserved for more arch-independent bits.
*/
#define KVM_REQ_TLB_FLUSH (0 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_VM_DEAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_UNBLOCK 2
-#define KVM_REQ_UNHALT 3
#define KVM_REQUEST_ARCH_BASE 8
/*
@@ -417,7 +416,7 @@ static __always_inline void guest_context_enter_irqoff(void)
*/
if (!context_tracking_guest_enter()) {
instrumentation_begin();
- rcu_virt_note_context_switch(smp_processor_id());
+ rcu_virt_note_context_switch();
instrumentation_end();
}
}
@@ -656,12 +655,12 @@ struct kvm_irq_routing_table {
};
#endif
-#ifndef KVM_PRIVATE_MEM_SLOTS
-#define KVM_PRIVATE_MEM_SLOTS 0
+#ifndef KVM_INTERNAL_MEM_SLOTS
+#define KVM_INTERNAL_MEM_SLOTS 0
#endif
#define KVM_MEM_SLOTS_NUM SHRT_MAX
-#define KVM_USER_MEM_SLOTS (KVM_MEM_SLOTS_NUM - KVM_PRIVATE_MEM_SLOTS)
+#define KVM_USER_MEM_SLOTS (KVM_MEM_SLOTS_NUM - KVM_INTERNAL_MEM_SLOTS)
#ifndef __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu)
@@ -765,10 +764,10 @@ struct kvm {
#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
struct mmu_notifier mmu_notifier;
- unsigned long mmu_notifier_seq;
- long mmu_notifier_count;
- unsigned long mmu_notifier_range_start;
- unsigned long mmu_notifier_range_end;
+ unsigned long mmu_invalidate_seq;
+ long mmu_invalidate_in_progress;
+ unsigned long mmu_invalidate_range_start;
+ unsigned long mmu_invalidate_range_end;
#endif
struct list_head devices;
u64 manual_dirty_log_protect;
@@ -777,6 +776,7 @@ struct kvm {
struct srcu_struct srcu;
struct srcu_struct irq_srcu;
pid_t userspace_pid;
+ bool override_halt_poll_ns;
unsigned int max_halt_poll_ns;
u32 dirty_ring_size;
bool vm_bugged;
@@ -1241,8 +1241,18 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data,
void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
/**
- * kvm_gfn_to_pfn_cache_init - prepare a cached kernel mapping and HPA for a
- * given guest physical address.
+ * kvm_gpc_init - initialize gfn_to_pfn_cache.
+ *
+ * @gpc: struct gfn_to_pfn_cache object.
+ *
+ * This sets up a gfn_to_pfn_cache by initializing locks. Note, the cache must
+ * be zero-allocated (or zeroed by the caller before init).
+ */
+void kvm_gpc_init(struct gfn_to_pfn_cache *gpc);
+
+/**
+ * kvm_gpc_activate - prepare a cached kernel mapping and HPA for a given guest
+ * physical address.
*
* @kvm: pointer to kvm instance.
* @gpc: struct gfn_to_pfn_cache object.
@@ -1266,9 +1276,9 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
* kvm_gfn_to_pfn_cache_check() to ensure that the cache is valid before
* accessing the target page.
*/
-int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
- struct kvm_vcpu *vcpu, enum pfn_cache_usage usage,
- gpa_t gpa, unsigned long len);
+int kvm_gpc_activate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
+ struct kvm_vcpu *vcpu, enum pfn_cache_usage usage,
+ gpa_t gpa, unsigned long len);
/**
* kvm_gfn_to_pfn_cache_check - check validity of a gfn_to_pfn_cache.
@@ -1325,7 +1335,7 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc);
/**
- * kvm_gfn_to_pfn_cache_destroy - destroy and unlink a gfn_to_pfn_cache.
+ * kvm_gpc_deactivate - deactivate and unlink a gfn_to_pfn_cache.
*
* @kvm: pointer to kvm instance.
* @gpc: struct gfn_to_pfn_cache object.
@@ -1333,7 +1343,7 @@ void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc);
* This removes a cache from the @kvm's list to be processed on MMU notifier
* invocation.
*/
-void kvm_gfn_to_pfn_cache_destroy(struct kvm *kvm, struct gfn_to_pfn_cache *gpc);
+void kvm_gpc_deactivate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc);
void kvm_sigset_activate(struct kvm_vcpu *vcpu);
void kvm_sigset_deactivate(struct kvm_vcpu *vcpu);
@@ -1357,10 +1367,10 @@ void kvm_mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc);
void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc);
#endif
-void kvm_inc_notifier_count(struct kvm *kvm, unsigned long start,
- unsigned long end);
-void kvm_dec_notifier_count(struct kvm *kvm, unsigned long start,
- unsigned long end);
+void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start,
+ unsigned long end);
+void kvm_mmu_invalidate_end(struct kvm *kvm, unsigned long start,
+ unsigned long end);
long kvm_arch_dev_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg);
@@ -1391,6 +1401,8 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
struct kvm_enable_cap *cap);
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg);
+long kvm_arch_vm_compat_ioctl(struct file *filp, unsigned int ioctl,
+ unsigned long arg);
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu);
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu);
@@ -1907,42 +1919,44 @@ extern const struct kvm_stats_header kvm_vcpu_stats_header;
extern const struct _kvm_stats_desc kvm_vcpu_stats_desc[];
#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
-static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq)
+static inline int mmu_invalidate_retry(struct kvm *kvm, unsigned long mmu_seq)
{
- if (unlikely(kvm->mmu_notifier_count))
+ if (unlikely(kvm->mmu_invalidate_in_progress))
return 1;
/*
- * Ensure the read of mmu_notifier_count happens before the read
- * of mmu_notifier_seq. This interacts with the smp_wmb() in
- * mmu_notifier_invalidate_range_end to make sure that the caller
- * either sees the old (non-zero) value of mmu_notifier_count or
- * the new (incremented) value of mmu_notifier_seq.
- * PowerPC Book3s HV KVM calls this under a per-page lock
- * rather than under kvm->mmu_lock, for scalability, so
- * can't rely on kvm->mmu_lock to keep things ordered.
+ * Ensure the read of mmu_invalidate_in_progress happens before
+ * the read of mmu_invalidate_seq. This interacts with the
+ * smp_wmb() in mmu_notifier_invalidate_range_end to make sure
+ * that the caller either sees the old (non-zero) value of
+ * mmu_invalidate_in_progress or the new (incremented) value of
+ * mmu_invalidate_seq.
+ *
+ * PowerPC Book3s HV KVM calls this under a per-page lock rather
+ * than under kvm->mmu_lock, for scalability, so can't rely on
+ * kvm->mmu_lock to keep things ordered.
*/
smp_rmb();
- if (kvm->mmu_notifier_seq != mmu_seq)
+ if (kvm->mmu_invalidate_seq != mmu_seq)
return 1;
return 0;
}
-static inline int mmu_notifier_retry_hva(struct kvm *kvm,
- unsigned long mmu_seq,
- unsigned long hva)
+static inline int mmu_invalidate_retry_hva(struct kvm *kvm,
+ unsigned long mmu_seq,
+ unsigned long hva)
{
lockdep_assert_held(&kvm->mmu_lock);
/*
- * If mmu_notifier_count is non-zero, then the range maintained by
- * kvm_mmu_notifier_invalidate_range_start contains all addresses that
- * might be being invalidated. Note that it may include some false
+ * If mmu_invalidate_in_progress is non-zero, then the range maintained
+ * by kvm_mmu_notifier_invalidate_range_start contains all addresses
+ * that might be being invalidated. Note that it may include some false
* positives, due to shortcuts when handing concurrent invalidations.
*/
- if (unlikely(kvm->mmu_notifier_count) &&
- hva >= kvm->mmu_notifier_range_start &&
- hva < kvm->mmu_notifier_range_end)
+ if (unlikely(kvm->mmu_invalidate_in_progress) &&
+ hva >= kvm->mmu_invalidate_range_start &&
+ hva < kvm->mmu_invalidate_range_end)
return 1;
- if (kvm->mmu_notifier_seq != mmu_seq)
+ if (kvm->mmu_invalidate_seq != mmu_seq)
return 1;
return 0;
}
@@ -2246,6 +2260,19 @@ static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu)
#endif /* CONFIG_KVM_XFER_TO_GUEST_WORK */
/*
+ * If more than one page is being (un)accounted, @virt must be the address of
+ * the first page of a block of pages what were allocated together (i.e
+ * accounted together).
+ *
+ * kvm_account_pgtable_pages() is thread-safe because mod_lruvec_page_state()
+ * is thread-safe.
+ */
+static inline void kvm_account_pgtable_pages(void *virt, int nr)
+{
+ mod_lruvec_page_state(virt_to_page(virt), NR_SECONDARY_PAGETABLE, nr);
+}
+
+/*
* This defines how many reserved entries we want to keep before we
* kick the vcpu to the userspace to avoid dirty ring full. This
* value can be tuned to higher if e.g. PML is enabled on the host.
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 0269ff114f5a..c9149ebe7423 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -101,7 +101,7 @@ enum {
ATA_DFLAG_UNLOCK_HPA = (1 << 18), /* unlock HPA */
ATA_DFLAG_NCQ_SEND_RECV = (1 << 19), /* device supports NCQ SEND and RECV */
ATA_DFLAG_NCQ_PRIO = (1 << 20), /* device supports NCQ priority */
- ATA_DFLAG_NCQ_PRIO_ENABLE = (1 << 21), /* Priority cmds sent to dev */
+ ATA_DFLAG_NCQ_PRIO_ENABLED = (1 << 21), /* Priority cmds sent to dev */
ATA_DFLAG_INIT_MASK = (1 << 24) - 1,
ATA_DFLAG_DETACH = (1 << 24),
@@ -1043,6 +1043,11 @@ static inline int ata_port_is_dummy(struct ata_port *ap)
return ap->ops == &ata_dummy_port_ops;
}
+static inline bool ata_port_is_frozen(const struct ata_port *ap)
+{
+ return ap->pflags & ATA_PFLAG_FROZEN;
+}
+
extern int ata_std_prereset(struct ata_link *link, unsigned long deadline);
extern int ata_wait_after_reset(struct ata_link *link, unsigned long deadline,
int (*check_ready)(struct ata_link *link));
@@ -1136,8 +1141,8 @@ extern int ata_scsi_slave_config(struct scsi_device *sdev);
extern void ata_scsi_slave_destroy(struct scsi_device *sdev);
extern int ata_scsi_change_queue_depth(struct scsi_device *sdev,
int queue_depth);
-extern int __ata_change_queue_depth(struct ata_port *ap, struct scsi_device *sdev,
- int queue_depth);
+extern int ata_change_queue_depth(struct ata_port *ap, struct ata_device *dev,
+ struct scsi_device *sdev, int queue_depth);
extern struct ata_device *ata_dev_pair(struct ata_device *adev);
extern int ata_do_set_mode(struct ata_link *link, struct ata_device **r_failed_dev);
extern void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap);
@@ -1382,7 +1387,8 @@ extern const struct attribute_group *ata_common_sdev_groups[];
.proc_name = drv_name, \
.slave_destroy = ata_scsi_slave_destroy, \
.bios_param = ata_std_bios_param, \
- .unlock_native_capacity = ata_scsi_unlock_native_capacity
+ .unlock_native_capacity = ata_scsi_unlock_native_capacity,\
+ .max_sectors = ATA_MAX_SECTORS_LBA48
#define ATA_SUBBASE_SHT(drv_name) \
__ATA_BASE_SHT(drv_name), \
@@ -1912,8 +1918,6 @@ extern void ata_sff_dev_select(struct ata_port *ap, unsigned int device);
extern u8 ata_sff_check_status(struct ata_port *ap);
extern void ata_sff_pause(struct ata_port *ap);
extern void ata_sff_dma_pause(struct ata_port *ap);
-extern int ata_sff_busy_sleep(struct ata_port *ap,
- unsigned long timeout_pat, unsigned long timeout);
extern int ata_sff_wait_ready(struct ata_link *link, unsigned long deadline);
extern void ata_sff_tf_load(struct ata_port *ap, const struct ata_taskfile *tf);
extern void ata_sff_tf_read(struct ata_port *ap, struct ata_taskfile *tf);
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index c74acfa1a3fe..af38252ad704 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -35,6 +35,11 @@ enum {
NDD_WORK_PENDING = 4,
/* dimm supports namespace labels */
NDD_LABELING = 6,
+ /*
+ * dimm contents have changed requiring invalidation of CPU caches prior
+ * to activation of a region that includes this device
+ */
+ NDD_INCOHERENT = 7,
/* need to set a limit somewhere, but yes, this is likely overkill */
ND_IOCTL_MAX_BUFLEN = SZ_4M,
@@ -183,6 +188,8 @@ struct nvdimm_security_ops {
int (*overwrite)(struct nvdimm *nvdimm,
const struct nvdimm_key_data *key_data);
int (*query_overwrite)(struct nvdimm *nvdimm);
+ int (*disable_master)(struct nvdimm *nvdimm,
+ const struct nvdimm_key_data *key_data);
};
enum nvdimm_fwa_state {
diff --git a/include/linux/linear_range.h b/include/linux/linear_range.h
index fd3d0b358f22..2e4f4c3539c0 100644
--- a/include/linux/linear_range.h
+++ b/include/linux/linear_range.h
@@ -26,6 +26,17 @@ struct linear_range {
unsigned int step;
};
+#define LINEAR_RANGE(_min, _min_sel, _max_sel, _step) \
+ { \
+ .min = _min, \
+ .min_sel = _min_sel, \
+ .max_sel = _max_sel, \
+ .step = _step, \
+ }
+
+#define LINEAR_RANGE_IDX(_idx, _min, _min_sel, _max_sel, _step) \
+ [_idx] = LINEAR_RANGE(_min, _min_sel, _max_sel, _step)
+
unsigned int linear_range_values_in_range(const struct linear_range *r);
unsigned int linear_range_values_in_range_array(const struct linear_range *r,
int ranges);
diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h
index 07add7882a5d..c9afcdd9324c 100644
--- a/include/linux/lru_cache.h
+++ b/include/linux/lru_cache.h
@@ -199,7 +199,6 @@ struct lru_cache {
unsigned long flags;
- void *lc_private;
const char *name;
/* nr_elements there */
@@ -241,7 +240,6 @@ extern struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
unsigned e_count, size_t e_size, size_t e_off);
extern void lc_reset(struct lru_cache *lc);
extern void lc_destroy(struct lru_cache *lc);
-extern void lc_set(struct lru_cache *lc, unsigned int enr, int index);
extern void lc_del(struct lru_cache *lc, struct lc_element *element);
extern struct lc_element *lc_get_cumulative(struct lru_cache *lc, unsigned int enr);
@@ -297,6 +295,5 @@ extern bool lc_is_used(struct lru_cache *lc, unsigned int enr);
container_of(ptr, type, member)
extern struct lc_element *lc_element_by_index(struct lru_cache *lc, unsigned i);
-extern unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e);
#endif
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 806448173033..ed6cb2ac55fa 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -145,6 +145,12 @@ LSM_HOOK(int, 0, inode_getxattr, struct dentry *dentry, const char *name)
LSM_HOOK(int, 0, inode_listxattr, struct dentry *dentry)
LSM_HOOK(int, 0, inode_removexattr, struct user_namespace *mnt_userns,
struct dentry *dentry, const char *name)
+LSM_HOOK(int, 0, inode_set_acl, struct user_namespace *mnt_userns,
+ struct dentry *dentry, const char *acl_name, struct posix_acl *kacl)
+LSM_HOOK(int, 0, inode_get_acl, struct user_namespace *mnt_userns,
+ struct dentry *dentry, const char *acl_name)
+LSM_HOOK(int, 0, inode_remove_acl, struct user_namespace *mnt_userns,
+ struct dentry *dentry, const char *acl_name)
LSM_HOOK(int, 0, inode_need_killpriv, struct dentry *dentry)
LSM_HOOK(int, 0, inode_killpriv, struct user_namespace *mnt_userns,
struct dentry *dentry)
@@ -177,6 +183,7 @@ LSM_HOOK(int, 0, file_send_sigiotask, struct task_struct *tsk,
struct fown_struct *fown, int sig)
LSM_HOOK(int, 0, file_receive, struct file *file)
LSM_HOOK(int, 0, file_open, struct file *file)
+LSM_HOOK(int, 0, file_truncate, struct file *file)
LSM_HOOK(int, 0, task_alloc, struct task_struct *task,
unsigned long clone_flags)
LSM_HOOK(void, LSM_RET_VOID, task_free, struct task_struct *task)
@@ -224,6 +231,7 @@ LSM_HOOK(int, -ENOSYS, task_prctl, int option, unsigned long arg2,
unsigned long arg3, unsigned long arg4, unsigned long arg5)
LSM_HOOK(void, LSM_RET_VOID, task_to_inode, struct task_struct *p,
struct inode *inode)
+LSM_HOOK(int, 0, userns_create, const struct cred *cred)
LSM_HOOK(int, 0, ipc_permission, struct kern_ipc_perm *ipcp, short flag)
LSM_HOOK(void, LSM_RET_VOID, ipc_getsecid, struct kern_ipc_perm *ipcp,
u32 *secid)
@@ -253,7 +261,7 @@ LSM_HOOK(int, 0, sem_semop, struct kern_ipc_perm *perm, struct sembuf *sops,
LSM_HOOK(int, 0, netlink_send, struct sock *sk, struct sk_buff *skb)
LSM_HOOK(void, LSM_RET_VOID, d_instantiate, struct dentry *dentry,
struct inode *inode)
-LSM_HOOK(int, -EINVAL, getprocattr, struct task_struct *p, char *name,
+LSM_HOOK(int, -EINVAL, getprocattr, struct task_struct *p, const char *name,
char **value)
LSM_HOOK(int, -EINVAL, setprocattr, const char *name, void *value, size_t size)
LSM_HOOK(int, 0, ismaclabel, const char *name)
@@ -301,7 +309,7 @@ LSM_HOOK(int, 0, socket_setsockopt, struct socket *sock, int level, int optname)
LSM_HOOK(int, 0, socket_shutdown, struct socket *sock, int how)
LSM_HOOK(int, 0, socket_sock_rcv_skb, struct sock *sk, struct sk_buff *skb)
LSM_HOOK(int, 0, socket_getpeersec_stream, struct socket *sock,
- char __user *optval, int __user *optlen, unsigned len)
+ sockptr_t optval, sockptr_t optlen, unsigned int len)
LSM_HOOK(int, 0, socket_getpeersec_dgram, struct socket *sock,
struct sk_buff *skb, u32 *secid)
LSM_HOOK(int, 0, sk_alloc_security, struct sock *sk, int family, gfp_t priority)
@@ -407,4 +415,5 @@ LSM_HOOK(int, 0, perf_event_write, struct perf_event *event)
#ifdef CONFIG_IO_URING
LSM_HOOK(int, 0, uring_override_creds, const struct cred *new)
LSM_HOOK(int, 0, uring_sqpoll, void)
+LSM_HOOK(int, 0, uring_cmd, struct io_uring_cmd *ioucmd)
#endif /* CONFIG_IO_URING */
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 84a0d7e02176..0a5ba81f7367 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -92,13 +92,14 @@
* is initialised to NULL by the caller.
* @fc indicates the new filesystem context.
* @src_fc indicates the original filesystem context.
+ * Return 0 on success or a negative error code on failure.
* @fs_context_parse_param:
* Userspace provided a parameter to configure a superblock. The LSM may
* reject it with an error and may use it for itself, in which case it
* should return 0; otherwise it should return -ENOPARAM to pass it on to
* the filesystem.
* @fc indicates the filesystem context.
- * @param The parameter
+ * @param The parameter.
*
* Security hooks for filesystem operations.
*
@@ -118,6 +119,7 @@
* Free memory associated with @mnt_ops.
* @sb_eat_lsm_opts:
* Eat (scan @orig options) and save them in @mnt_opts.
+ * Return 0 on success, negative values on failure.
* @sb_statfs:
* Check permission before obtaining filesystem statistics for the @mnt
* mountpoint.
@@ -136,31 +138,24 @@
* @flags contains the mount flags.
* @data contains the filesystem-specific data.
* Return 0 if permission is granted.
- * @sb_copy_data:
- * Allow mount option data to be copied prior to parsing by the filesystem,
- * so that the security module can extract security-specific mount
- * options cleanly (a filesystem may modify the data e.g. with strsep()).
- * This also allows the original mount data to be stripped of security-
- * specific options to avoid having to make filesystems aware of them.
- * @orig the original mount data copied from userspace.
- * @copy copied data which will be passed to the security module.
- * Returns 0 if the copy was successful.
* @sb_mnt_opts_compat:
* Determine if the new mount options in @mnt_opts are allowed given
* the existing mounted filesystem at @sb.
- * @sb superblock being compared
- * @mnt_opts new mount options
+ * @sb superblock being compared.
+ * @mnt_opts new mount options.
* Return 0 if options are compatible.
* @sb_remount:
* Extracts security system specific mount options and verifies no changes
* are being made to those options.
- * @sb superblock being remounted
+ * @sb superblock being remounted.
* @data contains the filesystem-specific data.
* Return 0 if permission is granted.
* @sb_kern_mount:
- * Mount this @sb if allowed by permissions.
+ * Mount this @sb if allowed by permissions.
+ * Return 0 if permission is granted.
* @sb_show_options:
* Show (print on @m) mount options for this @sb.
+ * Return 0 on success, negative values on failure.
* @sb_umount:
* Check permission before the @mnt file system is unmounted.
* @mnt contains the mounted file system.
@@ -174,31 +169,31 @@
* Return 0 if permission is granted.
* @sb_set_mnt_opts:
* Set the security relevant mount options used for a superblock
- * @sb the superblock to set security mount options for
- * @opts binary data structure containing all lsm mount data
+ * @sb the superblock to set security mount options for.
+ * @opts binary data structure containing all lsm mount data.
+ * Return 0 on success, error on failure.
* @sb_clone_mnt_opts:
* Copy all security options from a given superblock to another
- * @oldsb old superblock which contain information to clone
- * @newsb new superblock which needs filled in
- * @sb_parse_opts_str:
- * Parse a string of security data filling in the opts structure
- * @options string containing all mount options known by the LSM
- * @opts binary data structure usable by the LSM
+ * @oldsb old superblock which contain information to clone.
+ * @newsb new superblock which needs filled in.
+ * Return 0 on success, error on failure.
* @move_mount:
* Check permission before a mount is moved.
* @from_path indicates the mount that is going to be moved.
* @to_path indicates the mountpoint that will be mounted upon.
+ * Return 0 if permission is granted.
* @dentry_init_security:
* Compute a context for a dentry as the inode is not yet available
* since NFSv4 has no label backed by an EA anyway.
* @dentry dentry to use in calculating the context.
* @mode mode used to determine resource type.
- * @name name of the last path component used to create file
+ * @name name of the last path component used to create file.
* @xattr_name pointer to place the pointer to security xattr name.
* Caller does not have to free the resulting pointer. Its
* a pointer to static string.
* @ctx pointer to place the pointer to the resulting context in.
* @ctxlen point to place the length of the resulting context.
+ * Return 0 on success, negative values on failure.
* @dentry_create_files_as:
* Compute a context for a dentry as the inode is not yet available
* and set that context in passed in creds so that new files are
@@ -206,9 +201,10 @@
* passed in creds and not the creds of the caller.
* @dentry dentry to use in calculating the context.
* @mode mode used to determine resource type.
- * @name name of the last path component used to create file
- * @old creds which should be used for context calculation
- * @new creds to modify
+ * @name name of the last path component used to create file.
+ * @old creds which should be used for context calculation.
+ * @new creds to modify.
+ * Return 0 on success, error on failure.
*
*
* Security hooks for inode operations.
@@ -236,7 +232,7 @@
* then it should return -EOPNOTSUPP to skip this processing.
* @inode contains the inode structure of the newly created inode.
* @dir contains the inode structure of the parent directory.
- * @qstr contains the last path component of the new object
+ * @qstr contains the last path component of the new object.
* @name will be set to the allocated name suffix (e.g. selinux).
* @value will be set to the allocated attribute value.
* @len will be set to the length of the value.
@@ -247,9 +243,9 @@
* Set up the incore security field for the new anonymous inode
* and return whether the inode creation is permitted by the security
* module or not.
- * @inode contains the inode structure
- * @name name of the anonymous inode class
- * @context_inode optional related inode
+ * @inode contains the inode structure.
+ * @name name of the anonymous inode class.
+ * @context_inode optional related inode.
* Returns 0 on success, -EACCES if the security module denies the
* creation of this inode, or another -errno upon other errors.
* @inode_create:
@@ -365,7 +361,7 @@
* mode is specified in @mode.
* @path contains the path structure of the file to change the mode.
* @mode contains the new DAC's permission, which is a bitmask of
- * constants from <include/uapi/linux/stat.h>
+ * constants from <include/uapi/linux/stat.h>.
* Return 0 if permission is granted.
* @path_chown:
* Check for permission to change owner/group of a file or directory.
@@ -380,6 +376,7 @@
* @path_notify:
* Check permissions before setting a watch on events as defined by @mask,
* on an object at @path, whose type is defined by @obj_type.
+ * Return 0 if permission is granted.
* @inode_readlink:
* Check the permission to read the symbolic link.
* @dentry contains the dentry structure for the file link.
@@ -387,7 +384,7 @@
* @inode_follow_link:
* Check permission to follow a symbolic link when looking up a pathname.
* @dentry contains the dentry structure for the link.
- * @inode contains the inode, which itself is not stable in RCU-walk
+ * @inode contains the inode, which itself is not stable in RCU-walk.
* @rcu indicates whether we are in RCU-walk mode.
* Return 0 if permission is granted.
* @inode_permission:
@@ -409,7 +406,9 @@
* @attr is the iattr structure containing the new file attributes.
* Return 0 if permission is granted.
* @path_truncate:
- * Check permission before truncating a file.
+ * Check permission before truncating the file indicated by path.
+ * Note that truncation permissions may also be checked based on
+ * already opened files, using the @file_truncate hook.
* @path contains the path structure for the file.
* Return 0 if permission is granted.
* @inode_getattr:
@@ -435,13 +434,25 @@
* Check permission before removing the extended attribute
* identified by @name for @dentry.
* Return 0 if permission is granted.
+ * @inode_set_acl:
+ * Check permission before setting posix acls
+ * The posix acls in @kacl are identified by @acl_name.
+ * Return 0 if permission is granted.
+ * @inode_get_acl:
+ * Check permission before getting osix acls
+ * The posix acls are identified by @acl_name.
+ * Return 0 if permission is granted.
+ * @inode_remove_acl:
+ * Check permission before removing posix acls
+ * The posix acls are identified by @acl_name.
+ * Return 0 if permission is granted.
* @inode_getsecurity:
* Retrieve a copy of the extended attribute representation of the
* security label associated with @name for @inode via @buffer. Note that
* @name is the remainder of the attribute name after the security prefix
- * has been removed. @alloc is used to specify of the call should return a
- * value via the buffer or just the value length Return size of buffer on
- * success.
+ * has been removed. @alloc is used to specify if the call should return a
+ * value via the buffer or just the value length.
+ * Return size of buffer on success.
* @inode_setsecurity:
* Set the security label associated with @name for @inode from the
* extended attribute value @value. @size indicates the size of the
@@ -464,7 +475,7 @@
* @inode_killpriv:
* The setuid bit is being removed. Remove similar security labels.
* Called with the dentry->d_inode->i_mutex held.
- * @mnt_userns: user namespace of the mount
+ * @mnt_userns: user namespace of the mount.
* @dentry is the dentry being changed.
* Return 0 on success. If error is returned, then the operation
* causing setuid bit removal is failed.
@@ -491,20 +502,22 @@
* to abort the copy up. Note that the caller is responsible for reading
* and writing the xattrs as this hook is merely a filter.
* @d_instantiate:
- * Fill in @inode security information for a @dentry if allowed.
+ * Fill in @inode security information for a @dentry if allowed.
* @getprocattr:
- * Read attribute @name for process @p and store it into @value if allowed.
+ * Read attribute @name for process @p and store it into @value if allowed.
+ * Return the length of @value on success, a negative value otherwise.
* @setprocattr:
- * Write (set) attribute @name to @value, size @size if allowed.
+ * Write (set) attribute @name to @value, size @size if allowed.
+ * Return written bytes on success, a negative value otherwise.
*
* Security hooks for kernfs node operations
*
* @kernfs_init_security:
* Initialize the security context of a newly created kernfs node based
* on its own and its parent's attributes.
- *
- * @kn_dir the parent kernfs node
- * @kn the new child kernfs node
+ * @kn_dir the parent kernfs node.
+ * @kn the new child kernfs node.
+ * Return 0 if permission is granted.
*
* Security hooks for file operations
*
@@ -543,11 +556,11 @@
* simple integer value. When @arg represents a user space pointer, it
* should never be used by the security module.
* Return 0 if permission is granted.
- * @mmap_addr :
+ * @mmap_addr:
* Check permissions for a mmap operation at @addr.
* @addr contains virtual address that will be used for the operation.
* Return 0 if permission is granted.
- * @mmap_file :
+ * @mmap_file:
* Check permissions for a mmap operation. The @file may be NULL, e.g.
* if mapping anonymous memory.
* @file contains the file structure for file to map (may be NULL).
@@ -598,10 +611,17 @@
* to receive an open file descriptor via socket IPC.
* @file contains the file structure being received.
* Return 0 if permission is granted.
+ * @file_truncate:
+ * Check permission before truncating a file, i.e. using ftruncate.
+ * Note that truncation permission may also be checked based on the path,
+ * using the @path_truncate hook.
+ * @file contains the file structure for the file.
+ * Return 0 if permission is granted.
* @file_open:
* Save open-time permission checking state for later use upon
* file_permission, and recheck access if anything has changed
* since inode_permission.
+ * Return 0 if permission is granted.
*
* Security hooks for task operations.
*
@@ -619,6 +639,7 @@
* @gfp indicates the atomicity of any memory allocations.
* Only allocate sufficient memory and attach to @cred such that
* cred_transfer() will not get ENOMEM.
+ * Return 0 on success, negative values on failure.
* @cred_free:
* @cred points to the credentials.
* Deallocate and clear the cred->security field in a set of credentials.
@@ -627,6 +648,7 @@
* @old points to the original credentials.
* @gfp indicates the atomicity of any memory allocations.
* Prepare a new set of credentials by copying the data from the old set.
+ * Return 0 on success, negative values on failure.
* @cred_transfer:
* @new points to the new credentials.
* @old points to the original credentials.
@@ -638,7 +660,7 @@
* @kernel_act_as:
* Set the credentials for a kernel service to act as (subjective context).
* @new points to the credentials to be modified.
- * @secid specifies the security ID to be set
+ * @secid specifies the security ID to be set.
* The current task must be the one that nominated @secid.
* Return 0 if successful.
* @kernel_create_files_as:
@@ -651,19 +673,19 @@
* @kernel_module_request:
* Ability to trigger the kernel to automatically upcall to userspace for
* userspace to load a kernel module with the given name.
- * @kmod_name name of the module requested by the kernel
+ * @kmod_name name of the module requested by the kernel.
* Return 0 if successful.
* @kernel_load_data:
* Load data provided by userspace.
- * @id kernel load data identifier
+ * @id kernel load data identifier.
* @contents if a subsequent @kernel_post_load_data will be called.
* Return 0 if permission is granted.
* @kernel_post_load_data:
* Load data provided by a non-file source (usually userspace buffer).
* @buf pointer to buffer containing the data contents.
* @size length of the data contents.
- * @id kernel load data identifier
- * @description a text description of what was loaded, @id-specific
+ * @id kernel load data identifier.
+ * @description a text description of what was loaded, @id-specific.
* Return 0 if permission is granted.
* This must be paired with a prior @kernel_load_data call that had
* @contents set to true.
@@ -671,7 +693,7 @@
* Read a file specified by userspace.
* @file contains the file structure pointing to the file being read
* by the kernel.
- * @id kernel read file identifier
+ * @id kernel read file identifier.
* @contents if a subsequent @kernel_post_read_file will be called.
* Return 0 if permission is granted.
* @kernel_post_read_file:
@@ -680,7 +702,7 @@
* by the kernel.
* @buf pointer to buffer containing the file contents.
* @size length of the file contents.
- * @id kernel read file identifier
+ * @id kernel read file identifier.
* This must be paired with a prior @kernel_read_file call that had
* @contents set to true.
* Return 0 if permission is granted.
@@ -690,7 +712,7 @@
* indicates which of the set*uid system calls invoked this hook. If
* @new is the set of credentials that will be installed. Modifications
* should be made to this rather than to @current->cred.
- * @old is the set of credentials that are being replaces
+ * @old is the set of credentials that are being replaced.
* @flags contains one of the LSM_SETID_* values.
* Return 0 on success.
* @task_fix_setgid:
@@ -742,7 +764,7 @@
* @task_setioprio:
* Check permission before setting the ioprio value of @p to @ioprio.
* @p contains the task_struct of process.
- * @ioprio contains the new ioprio value
+ * @ioprio contains the new ioprio value.
* Return 0 if permission is granted.
* @task_getioprio:
* Check permission before getting the ioprio value of @p.
@@ -806,6 +828,10 @@
* security attributes, e.g. for /proc/pid inodes.
* @p contains the task_struct for the task.
* @inode contains the inode structure for the inode.
+ * @userns_create:
+ * Check permission prior to creating a new user namespace.
+ * @cred points to prepared creds.
+ * Return 0 if successful, otherwise < 0 error code.
*
* Security hooks for Netlink messaging.
*
@@ -869,6 +895,7 @@
* @type contains the requested communications type.
* @protocol contains the requested protocol.
* @kern set to 1 if a kernel socket.
+ * Return 0 if permission is granted.
* @socket_socketpair:
* Check permissions before creating a fresh pair of sockets.
* @socka contains the first socket structure.
@@ -952,14 +979,15 @@
* Must not sleep inside this hook because some callers hold spinlocks.
* @sk contains the sock (not socket) associated with the incoming sk_buff.
* @skb contains the incoming network data.
+ * Return 0 if permission is granted.
* @socket_getpeersec_stream:
* This hook allows the security module to provide peer socket security
* state for unix or connected tcp sockets to userspace via getsockopt
* SO_GETPEERSEC. For tcp sockets this can be meaningful if the
* socket is associated with an ipsec SA.
* @sock is the local socket.
- * @optval userspace memory where the security state is to be copied.
- * @optlen userspace int where the module should copy the actual length
+ * @optval memory where the security state is to be copied.
+ * @optlen memory where the module should copy the actual length
* of the security state.
* @len as input is the maximum length to copy to userspace provided
* by the caller.
@@ -979,6 +1007,7 @@
* @sk_alloc_security:
* Allocate and attach a security structure to the sk->sk_security field,
* which is used to copy security attributes between local stream sockets.
+ * Return 0 on success, error on failure.
* @sk_free_security:
* Deallocate security structure.
* @sk_clone_security:
@@ -991,17 +1020,19 @@
* @inet_conn_request:
* Sets the openreq's sid to socket's sid with MLS portion taken
* from peer sid.
+ * Return 0 if permission is granted.
* @inet_csk_clone:
* Sets the new child socket's sid to the openreq sid.
* @inet_conn_established:
* Sets the connection's peersid to the secmark on skb.
* @secmark_relabel_packet:
- * check if the process should be allowed to relabel packets to
- * the given secid
+ * Check if the process should be allowed to relabel packets to
+ * the given secid.
+ * Return 0 if permission is granted.
* @secmark_refcount_inc:
- * tells the LSM to increment the number of secmark labeling rules loaded
+ * Tells the LSM to increment the number of secmark labeling rules loaded.
* @secmark_refcount_dec:
- * tells the LSM to decrement the number of secmark labeling rules loaded
+ * Tells the LSM to decrement the number of secmark labeling rules loaded.
* @req_classify_flow:
* Sets the flow's sid to the openreq sid.
* @tun_dev_alloc_security:
@@ -1012,21 +1043,25 @@
* @tun_dev_free_security:
* This hook allows a module to free the security structure for a TUN
* device.
- * @security pointer to the TUN device's security structure
+ * @security pointer to the TUN device's security structure.
* @tun_dev_create:
* Check permissions prior to creating a new TUN device.
+ * Return 0 if permission is granted.
* @tun_dev_attach_queue:
* Check permissions prior to attaching to a TUN device queue.
* @security pointer to the TUN device's security structure.
+ * Return 0 if permission is granted.
* @tun_dev_attach:
* This hook can be used by the module to update any security state
* associated with the TUN device's sock structure.
* @sk contains the existing sock structure.
* @security pointer to the TUN device's security structure.
+ * Return 0 if permission is granted.
* @tun_dev_open:
* This hook can be used by the module to update any security state
* associated with the TUN device's security structure.
* @security pointer to the TUN devices's security structure.
+ * Return 0 if permission is granted.
*
* Security hooks for SCTP
*
@@ -1059,6 +1094,7 @@
* to the security module.
* @asoc pointer to sctp association structure.
* @skb pointer to skbuff of association packet.
+ * Return 0 if permission is granted.
*
* Security hooks for Infiniband
*
@@ -1067,15 +1103,17 @@
* @subnet_prefix the subnet prefix of the port being used.
* @pkey the pkey to be accessed.
* @sec pointer to a security structure.
+ * Return 0 if permission is granted.
* @ib_endport_manage_subnet:
* Check permissions to send and receive SMPs on a end port.
* @dev_name the IB device name (i.e. mlx4_0).
* @port_num the port number.
* @sec pointer to a security structure.
+ * Return 0 if permission is granted.
* @ib_alloc_security:
* Allocate a security structure for Infiniband objects.
* @sec pointer to a security structure pointer.
- * Returns 0 on success, non-zero on failure
+ * Returns 0 on success, non-zero on failure.
* @ib_free_security:
* Deallocate an Infiniband security structure.
* @sec contains the security structure to be freed.
@@ -1087,10 +1125,11 @@
* Database used by the XFRM system.
* @sec_ctx contains the security context information being provided by
* the user-level policy update program (e.g., setkey).
+ * @gfp is to specify the context for the allocation.
* Allocate a security structure to the xp->security field; the security
* field is initialized to NULL when the xfrm_policy is allocated.
- * Return 0 if operation was successful (memory to allocate, legal context)
- * @gfp is to specify the context for the allocation
+ * Return 0 if operation was successful (memory to allocate, legal
+ * context).
* @xfrm_policy_clone_security:
* @old_ctx contains an existing xfrm_sec_ctx.
* @new_ctxp contains a new xfrm_sec_ctx being cloned from old.
@@ -1098,11 +1137,12 @@
* information from the old_ctx structure.
* Return 0 if operation was successful (memory to allocate).
* @xfrm_policy_free_security:
- * @ctx contains the xfrm_sec_ctx
+ * @ctx contains the xfrm_sec_ctx.
* Deallocate xp->security.
* @xfrm_policy_delete_security:
* @ctx contains the xfrm_sec_ctx.
* Authorize deletion of xp->security.
+ * Return 0 if permission is granted.
* @xfrm_state_alloc:
* @x contains the xfrm_state being added to the Security Association
* Database by the XFRM system.
@@ -1128,6 +1168,7 @@
* @xfrm_state_delete_security:
* @x contains the xfrm_state.
* Authorize deletion of x->security.
+ * Return 0 if permission is granted.
* @xfrm_policy_lookup:
* @ctx contains the xfrm_sec_ctx for which the access control is being
* checked.
@@ -1156,7 +1197,7 @@
* Permit allocation of a key and assign security data. Note that key does
* not have a serial number assigned at this point.
* @key points to the key.
- * @flags is the allocation flags
+ * @flags is the allocation flags.
* Return 0 if permission is granted, -ve error otherwise.
* @key_free:
* Notification of destruction; free security data.
@@ -1186,8 +1227,8 @@
*
* @ipc_permission:
* Check permissions for access to IPC
- * @ipcp contains the kernel IPC permission structure
- * @flag contains the desired (requested) permission set
+ * @ipcp contains the kernel IPC permission structure.
+ * @flag contains the desired (requested) permission set.
* Return 0 if permission is granted.
* @ipc_getsecid:
* Get the secid associated with the ipc object.
@@ -1332,15 +1373,18 @@
* to @to.
* @from contains the struct cred for the sending process.
* @to contains the struct cred for the receiving process.
+ * Return 0 if permission is granted.
* @binder_transfer_binder:
* Check whether @from is allowed to transfer a binder reference to @to.
* @from contains the struct cred for the sending process.
* @to contains the struct cred for the receiving process.
+ * Return 0 if permission is granted.
* @binder_transfer_file:
* Check whether @from is allowed to transfer @file to @to.
* @from contains the struct cred for the sending process.
* @file contains the struct file being transferred.
* @to contains the struct cred for the receiving process.
+ * Return 0 if permission is granted.
*
* @ptrace_access_check:
* Check permission before allowing the current process to trace the
@@ -1382,32 +1426,39 @@
* Check whether the @tsk process has the @cap capability in the indicated
* credentials.
* @cred contains the credentials to use.
- * @ns contains the user namespace we want the capability in
+ * @ns contains the user namespace we want the capability in.
* @cap contains the capability <include/linux/capability.h>.
- * @opts contains options for the capable check <include/linux/security.h>
+ * @opts contains options for the capable check <include/linux/security.h>.
* Return 0 if the capability is granted for @tsk.
* @quotactl:
- * Check whether the quotactl syscall is allowed for this @sb.
+ * Check whether the quotactl syscall is allowed for this @sb.
+ * Return 0 if permission is granted.
* @quota_on:
- * Check whether QUOTAON is allowed for this @dentry.
+ * Check whether QUOTAON is allowed for this @dentry.
+ * Return 0 if permission is granted.
* @syslog:
* Check permission before accessing the kernel message ring or changing
* logging to the console.
* See the syslog(2) manual page for an explanation of the @type values.
- * @type contains the SYSLOG_ACTION_* constant from <include/linux/syslog.h>
+ * @type contains the SYSLOG_ACTION_* constant from
+ * <include/linux/syslog.h>.
* Return 0 if permission is granted.
* @settime:
* Check permission to change the system time.
* struct timespec64 is defined in <include/linux/time64.h> and timezone
* is defined in <include/linux/time.h>
- * @ts contains new time
- * @tz contains new timezone
+ * @ts contains new time.
+ * @tz contains new timezone.
* Return 0 if permission is granted.
* @vm_enough_memory:
* Check permissions for allocating a new virtual mapping.
* @mm contains the mm struct it is being added to.
* @pages contains the number of pages.
- * Return 0 if permission is granted.
+ * Return 0 if permission is granted by the LSM infrastructure to the
+ * caller. If all LSMs return a positive value, __vm_enough_memory() will
+ * be called with cap_sys_admin set. If at least one LSM returns 0 or
+ * negative, __vm_enough_memory() will be called with cap_sys_admin
+ * cleared.
*
* @ismaclabel:
* Check if the extended attribute specified by @name
@@ -1425,11 +1476,13 @@
* @secid contains the security ID.
* @secdata contains the pointer that stores the converted security
* context.
- * @seclen pointer which contains the length of the data
+ * @seclen pointer which contains the length of the data.
+ * Return 0 on success, error on failure.
* @secctx_to_secid:
* Convert security context to secid.
* @secid contains the pointer to the generated security ID.
* @secdata contains the security context.
+ * Return 0 on success, error on failure.
*
* @release_secctx:
* Release the security context.
@@ -1466,7 +1519,7 @@
* @audit_rule_free:
* Deallocate the LSM audit rule structure previously allocated by
* audit_rule_init.
- * @lsmrule contains the allocated rule
+ * @lsmrule contains the allocated rule.
*
* @inode_invalidate_secctx:
* Notify the security module that it must revalidate the security context
@@ -1483,6 +1536,7 @@
* @inode we wish to set the security context of.
* @ctx contains the string which we wish to set in the inode.
* @ctxlen contains the length of @ctx.
+ * Return 0 on success, error on failure.
*
* @inode_setsecctx:
* Change the security context of an inode. Updates the
@@ -1496,6 +1550,7 @@
* @dentry contains the inode we wish to set the security context of.
* @ctx contains the string which we wish to set in the inode.
* @ctxlen contains the length of @ctx.
+ * Return 0 on success, error on failure.
*
* @inode_getsecctx:
* On success, returns 0 and fills out @ctx and @ctxlen with the security
@@ -1503,6 +1558,7 @@
* @inode we wish to get the security context of.
* @ctx is a pointer in which to place the allocated security context.
* @ctxlen points to the place to put the length of @ctx.
+ * Return 0 on success, error on failure.
*
* Security hooks for the general notification queue:
*
@@ -1510,13 +1566,15 @@
* Check to see if a watch notification can be posted to a particular
* queue.
* @w_cred: The credentials of the whoever set the watch.
- * @cred: The event-triggerer's credentials
- * @n: The notification being posted
+ * @cred: The event-triggerer's credentials.
+ * @n: The notification being posted.
+ * Return 0 if permission is granted.
*
* @watch_key:
* Check to see if a process is allowed to watch for event notifications
* from a key or keyring.
* @key: The key to watch.
+ * Return 0 if permission is granted.
*
* Security hooks for using the eBPF maps and programs functionalities through
* eBPF syscalls.
@@ -1525,62 +1583,74 @@
* Do a initial check for all bpf syscalls after the attribute is copied
* into the kernel. The actual security module can implement their own
* rules to check the specific cmd they need.
+ * Return 0 if permission is granted.
*
* @bpf_map:
* Do a check when the kernel generate and return a file descriptor for
* eBPF maps.
- *
- * @map: bpf map that we want to access
- * @mask: the access flags
+ * @map: bpf map that we want to access.
+ * @mask: the access flags.
+ * Return 0 if permission is granted.
*
* @bpf_prog:
* Do a check when the kernel generate and return a file descriptor for
* eBPF programs.
- *
* @prog: bpf prog that userspace want to use.
+ * Return 0 if permission is granted.
*
* @bpf_map_alloc_security:
* Initialize the security field inside bpf map.
+ * Return 0 on success, error on failure.
*
* @bpf_map_free_security:
* Clean up the security information stored inside bpf map.
*
* @bpf_prog_alloc_security:
* Initialize the security field inside bpf program.
+ * Return 0 on success, error on failure.
*
* @bpf_prog_free_security:
* Clean up the security information stored inside bpf prog.
*
* @locked_down:
- * Determine whether a kernel feature that potentially enables arbitrary
- * code execution in kernel space should be permitted.
- *
- * @what: kernel feature being accessed
+ * Determine whether a kernel feature that potentially enables arbitrary
+ * code execution in kernel space should be permitted.
+ * @what: kernel feature being accessed.
+ * Return 0 if permission is granted.
*
* Security hooks for perf events
*
* @perf_event_open:
- * Check whether the @type of perf_event_open syscall is allowed.
+ * Check whether the @type of perf_event_open syscall is allowed.
+ * Return 0 if permission is granted.
* @perf_event_alloc:
- * Allocate and save perf_event security info.
+ * Allocate and save perf_event security info.
+ * Return 0 on success, error on failure.
* @perf_event_free:
- * Release (free) perf_event security info.
+ * Release (free) perf_event security info.
* @perf_event_read:
- * Read perf_event security info if allowed.
+ * Read perf_event security info if allowed.
+ * Return 0 if permission is granted.
* @perf_event_write:
- * Write perf_event security info if allowed.
+ * Write perf_event security info if allowed.
+ * Return 0 if permission is granted.
*
* Security hooks for io_uring
*
* @uring_override_creds:
- * Check if the current task, executing an io_uring operation, is allowed
- * to override it's credentials with @new.
- *
- * @new: the new creds to use
+ * Check if the current task, executing an io_uring operation, is allowed
+ * to override it's credentials with @new.
+ * @new: the new creds to use.
+ * Return 0 if permission is granted.
*
* @uring_sqpoll:
- * Check whether the current task is allowed to spawn a io_uring polling
- * thread (IORING_SETUP_SQPOLL).
+ * Check whether the current task is allowed to spawn a io_uring polling
+ * thread (IORING_SETUP_SQPOLL).
+ * Return 0 if permission is granted.
+ *
+ * @uring_cmd:
+ * Check whether the file_operations uring_cmd is allowed to run.
+ * Return 0 if permission is granted.
*
*/
union security_list_options {
diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
new file mode 100644
index 000000000000..e594db58a0f1
--- /dev/null
+++ b/include/linux/maple_tree.h
@@ -0,0 +1,692 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#ifndef _LINUX_MAPLE_TREE_H
+#define _LINUX_MAPLE_TREE_H
+/*
+ * Maple Tree - An RCU-safe adaptive tree for storing ranges
+ * Copyright (c) 2018-2022 Oracle
+ * Authors: Liam R. Howlett <Liam.Howlett@Oracle.com>
+ * Matthew Wilcox <willy@infradead.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/rcupdate.h>
+#include <linux/spinlock.h>
+/* #define CONFIG_MAPLE_RCU_DISABLED */
+/* #define CONFIG_DEBUG_MAPLE_TREE_VERBOSE */
+
+/*
+ * Allocated nodes are mutable until they have been inserted into the tree,
+ * at which time they cannot change their type until they have been removed
+ * from the tree and an RCU grace period has passed.
+ *
+ * Removed nodes have their ->parent set to point to themselves. RCU readers
+ * check ->parent before relying on the value that they loaded from the
+ * slots array. This lets us reuse the slots array for the RCU head.
+ *
+ * Nodes in the tree point to their parent unless bit 0 is set.
+ */
+#if defined(CONFIG_64BIT) || defined(BUILD_VDSO32_64)
+/* 64bit sizes */
+#define MAPLE_NODE_SLOTS 31 /* 256 bytes including ->parent */
+#define MAPLE_RANGE64_SLOTS 16 /* 256 bytes */
+#define MAPLE_ARANGE64_SLOTS 10 /* 240 bytes */
+#define MAPLE_ARANGE64_META_MAX 15 /* Out of range for metadata */
+#define MAPLE_ALLOC_SLOTS (MAPLE_NODE_SLOTS - 1)
+#else
+/* 32bit sizes */
+#define MAPLE_NODE_SLOTS 63 /* 256 bytes including ->parent */
+#define MAPLE_RANGE64_SLOTS 32 /* 256 bytes */
+#define MAPLE_ARANGE64_SLOTS 21 /* 240 bytes */
+#define MAPLE_ARANGE64_META_MAX 31 /* Out of range for metadata */
+#define MAPLE_ALLOC_SLOTS (MAPLE_NODE_SLOTS - 2)
+#endif /* defined(CONFIG_64BIT) || defined(BUILD_VDSO32_64) */
+
+#define MAPLE_NODE_MASK 255UL
+
+/*
+ * The node->parent of the root node has bit 0 set and the rest of the pointer
+ * is a pointer to the tree itself. No more bits are available in this pointer
+ * (on m68k, the data structure may only be 2-byte aligned).
+ *
+ * Internal non-root nodes can only have maple_range_* nodes as parents. The
+ * parent pointer is 256B aligned like all other tree nodes. When storing a 32
+ * or 64 bit values, the offset can fit into 4 bits. The 16 bit values need an
+ * extra bit to store the offset. This extra bit comes from a reuse of the last
+ * bit in the node type. This is possible by using bit 1 to indicate if bit 2
+ * is part of the type or the slot.
+ *
+ * Once the type is decided, the decision of an allocation range type or a range
+ * type is done by examining the immutable tree flag for the MAPLE_ALLOC_RANGE
+ * flag.
+ *
+ * Node types:
+ * 0x??1 = Root
+ * 0x?00 = 16 bit nodes
+ * 0x010 = 32 bit nodes
+ * 0x110 = 64 bit nodes
+ *
+ * Slot size and location in the parent pointer:
+ * type : slot location
+ * 0x??1 : Root
+ * 0x?00 : 16 bit values, type in 0-1, slot in 2-6
+ * 0x010 : 32 bit values, type in 0-2, slot in 3-6
+ * 0x110 : 64 bit values, type in 0-2, slot in 3-6
+ */
+
+/*
+ * This metadata is used to optimize the gap updating code and in reverse
+ * searching for gaps or any other code that needs to find the end of the data.
+ */
+struct maple_metadata {
+ unsigned char end;
+ unsigned char gap;
+};
+
+/*
+ * Leaf nodes do not store pointers to nodes, they store user data. Users may
+ * store almost any bit pattern. As noted above, the optimisation of storing an
+ * entry at 0 in the root pointer cannot be done for data which have the bottom
+ * two bits set to '10'. We also reserve values with the bottom two bits set to
+ * '10' which are below 4096 (ie 2, 6, 10 .. 4094) for internal use. Some APIs
+ * return errnos as a negative errno shifted right by two bits and the bottom
+ * two bits set to '10', and while choosing to store these values in the array
+ * is not an error, it may lead to confusion if you're testing for an error with
+ * mas_is_err().
+ *
+ * Non-leaf nodes store the type of the node pointed to (enum maple_type in bits
+ * 3-6), bit 2 is reserved. That leaves bits 0-1 unused for now.
+ *
+ * In regular B-Tree terms, pivots are called keys. The term pivot is used to
+ * indicate that the tree is specifying ranges, Pivots may appear in the
+ * subtree with an entry attached to the value whereas keys are unique to a
+ * specific position of a B-tree. Pivot values are inclusive of the slot with
+ * the same index.
+ */
+
+struct maple_range_64 {
+ struct maple_pnode *parent;
+ unsigned long pivot[MAPLE_RANGE64_SLOTS - 1];
+ union {
+ void __rcu *slot[MAPLE_RANGE64_SLOTS];
+ struct {
+ void __rcu *pad[MAPLE_RANGE64_SLOTS - 1];
+ struct maple_metadata meta;
+ };
+ };
+};
+
+/*
+ * At tree creation time, the user can specify that they're willing to trade off
+ * storing fewer entries in a tree in return for storing more information in
+ * each node.
+ *
+ * The maple tree supports recording the largest range of NULL entries available
+ * in this node, also called gaps. This optimises the tree for allocating a
+ * range.
+ */
+struct maple_arange_64 {
+ struct maple_pnode *parent;
+ unsigned long pivot[MAPLE_ARANGE64_SLOTS - 1];
+ void __rcu *slot[MAPLE_ARANGE64_SLOTS];
+ unsigned long gap[MAPLE_ARANGE64_SLOTS];
+ struct maple_metadata meta;
+};
+
+struct maple_alloc {
+ unsigned long total;
+ unsigned char node_count;
+ unsigned int request_count;
+ struct maple_alloc *slot[MAPLE_ALLOC_SLOTS];
+};
+
+struct maple_topiary {
+ struct maple_pnode *parent;
+ struct maple_enode *next; /* Overlaps the pivot */
+};
+
+enum maple_type {
+ maple_dense,
+ maple_leaf_64,
+ maple_range_64,
+ maple_arange_64,
+};
+
+
+/**
+ * DOC: Maple tree flags
+ *
+ * * MT_FLAGS_ALLOC_RANGE - Track gaps in this tree
+ * * MT_FLAGS_USE_RCU - Operate in RCU mode
+ * * MT_FLAGS_HEIGHT_OFFSET - The position of the tree height in the flags
+ * * MT_FLAGS_HEIGHT_MASK - The mask for the maple tree height value
+ * * MT_FLAGS_LOCK_MASK - How the mt_lock is used
+ * * MT_FLAGS_LOCK_IRQ - Acquired irq-safe
+ * * MT_FLAGS_LOCK_BH - Acquired bh-safe
+ * * MT_FLAGS_LOCK_EXTERN - mt_lock is not used
+ *
+ * MAPLE_HEIGHT_MAX The largest height that can be stored
+ */
+#define MT_FLAGS_ALLOC_RANGE 0x01
+#define MT_FLAGS_USE_RCU 0x02
+#define MT_FLAGS_HEIGHT_OFFSET 0x02
+#define MT_FLAGS_HEIGHT_MASK 0x7C
+#define MT_FLAGS_LOCK_MASK 0x300
+#define MT_FLAGS_LOCK_IRQ 0x100
+#define MT_FLAGS_LOCK_BH 0x200
+#define MT_FLAGS_LOCK_EXTERN 0x300
+
+#define MAPLE_HEIGHT_MAX 31
+
+
+#define MAPLE_NODE_TYPE_MASK 0x0F
+#define MAPLE_NODE_TYPE_SHIFT 0x03
+
+#define MAPLE_RESERVED_RANGE 4096
+
+#ifdef CONFIG_LOCKDEP
+typedef struct lockdep_map *lockdep_map_p;
+#define mt_lock_is_held(mt) lock_is_held(mt->ma_external_lock)
+#define mt_set_external_lock(mt, lock) \
+ (mt)->ma_external_lock = &(lock)->dep_map
+#else
+typedef struct { /* nothing */ } lockdep_map_p;
+#define mt_lock_is_held(mt) 1
+#define mt_set_external_lock(mt, lock) do { } while (0)
+#endif
+
+/*
+ * If the tree contains a single entry at index 0, it is usually stored in
+ * tree->ma_root. To optimise for the page cache, an entry which ends in '00',
+ * '01' or '11' is stored in the root, but an entry which ends in '10' will be
+ * stored in a node. Bits 3-6 are used to store enum maple_type.
+ *
+ * The flags are used both to store some immutable information about this tree
+ * (set at tree creation time) and dynamic information set under the spinlock.
+ *
+ * Another use of flags are to indicate global states of the tree. This is the
+ * case with the MAPLE_USE_RCU flag, which indicates the tree is currently in
+ * RCU mode. This mode was added to allow the tree to reuse nodes instead of
+ * re-allocating and RCU freeing nodes when there is a single user.
+ */
+struct maple_tree {
+ union {
+ spinlock_t ma_lock;
+ lockdep_map_p ma_external_lock;
+ };
+ void __rcu *ma_root;
+ unsigned int ma_flags;
+};
+
+/**
+ * MTREE_INIT() - Initialize a maple tree
+ * @name: The maple tree name
+ * @__flags: The maple tree flags
+ *
+ */
+#define MTREE_INIT(name, __flags) { \
+ .ma_lock = __SPIN_LOCK_UNLOCKED((name).ma_lock), \
+ .ma_flags = __flags, \
+ .ma_root = NULL, \
+}
+
+/**
+ * MTREE_INIT_EXT() - Initialize a maple tree with an external lock.
+ * @name: The tree name
+ * @__flags: The maple tree flags
+ * @__lock: The external lock
+ */
+#ifdef CONFIG_LOCKDEP
+#define MTREE_INIT_EXT(name, __flags, __lock) { \
+ .ma_external_lock = &(__lock).dep_map, \
+ .ma_flags = (__flags), \
+ .ma_root = NULL, \
+}
+#else
+#define MTREE_INIT_EXT(name, __flags, __lock) MTREE_INIT(name, __flags)
+#endif
+
+#define DEFINE_MTREE(name) \
+ struct maple_tree name = MTREE_INIT(name, 0)
+
+#define mtree_lock(mt) spin_lock((&(mt)->ma_lock))
+#define mtree_unlock(mt) spin_unlock((&(mt)->ma_lock))
+
+/*
+ * The Maple Tree squeezes various bits in at various points which aren't
+ * necessarily obvious. Usually, this is done by observing that pointers are
+ * N-byte aligned and thus the bottom log_2(N) bits are available for use. We
+ * don't use the high bits of pointers to store additional information because
+ * we don't know what bits are unused on any given architecture.
+ *
+ * Nodes are 256 bytes in size and are also aligned to 256 bytes, giving us 8
+ * low bits for our own purposes. Nodes are currently of 4 types:
+ * 1. Single pointer (Range is 0-0)
+ * 2. Non-leaf Allocation Range nodes
+ * 3. Non-leaf Range nodes
+ * 4. Leaf Range nodes All nodes consist of a number of node slots,
+ * pivots, and a parent pointer.
+ */
+
+struct maple_node {
+ union {
+ struct {
+ struct maple_pnode *parent;
+ void __rcu *slot[MAPLE_NODE_SLOTS];
+ };
+ struct {
+ void *pad;
+ struct rcu_head rcu;
+ struct maple_enode *piv_parent;
+ unsigned char parent_slot;
+ enum maple_type type;
+ unsigned char slot_len;
+ unsigned int ma_flags;
+ };
+ struct maple_range_64 mr64;
+ struct maple_arange_64 ma64;
+ struct maple_alloc alloc;
+ };
+};
+
+/*
+ * More complicated stores can cause two nodes to become one or three and
+ * potentially alter the height of the tree. Either half of the tree may need
+ * to be rebalanced against the other. The ma_topiary struct is used to track
+ * which nodes have been 'cut' from the tree so that the change can be done
+ * safely at a later date. This is done to support RCU.
+ */
+struct ma_topiary {
+ struct maple_enode *head;
+ struct maple_enode *tail;
+ struct maple_tree *mtree;
+};
+
+void *mtree_load(struct maple_tree *mt, unsigned long index);
+
+int mtree_insert(struct maple_tree *mt, unsigned long index,
+ void *entry, gfp_t gfp);
+int mtree_insert_range(struct maple_tree *mt, unsigned long first,
+ unsigned long last, void *entry, gfp_t gfp);
+int mtree_alloc_range(struct maple_tree *mt, unsigned long *startp,
+ void *entry, unsigned long size, unsigned long min,
+ unsigned long max, gfp_t gfp);
+int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp,
+ void *entry, unsigned long size, unsigned long min,
+ unsigned long max, gfp_t gfp);
+
+int mtree_store_range(struct maple_tree *mt, unsigned long first,
+ unsigned long last, void *entry, gfp_t gfp);
+int mtree_store(struct maple_tree *mt, unsigned long index,
+ void *entry, gfp_t gfp);
+void *mtree_erase(struct maple_tree *mt, unsigned long index);
+
+void mtree_destroy(struct maple_tree *mt);
+void __mt_destroy(struct maple_tree *mt);
+
+/**
+ * mtree_empty() - Determine if a tree has any present entries.
+ * @mt: Maple Tree.
+ *
+ * Context: Any context.
+ * Return: %true if the tree contains only NULL pointers.
+ */
+static inline bool mtree_empty(const struct maple_tree *mt)
+{
+ return mt->ma_root == NULL;
+}
+
+/* Advanced API */
+
+/*
+ * The maple state is defined in the struct ma_state and is used to keep track
+ * of information during operations, and even between operations when using the
+ * advanced API.
+ *
+ * If state->node has bit 0 set then it references a tree location which is not
+ * a node (eg the root). If bit 1 is set, the rest of the bits are a negative
+ * errno. Bit 2 (the 'unallocated slots' bit) is clear. Bits 3-6 indicate the
+ * node type.
+ *
+ * state->alloc either has a request number of nodes or an allocated node. If
+ * stat->alloc has a requested number of nodes, the first bit will be set (0x1)
+ * and the remaining bits are the value. If state->alloc is a node, then the
+ * node will be of type maple_alloc. maple_alloc has MAPLE_NODE_SLOTS - 1 for
+ * storing more allocated nodes, a total number of nodes allocated, and the
+ * node_count in this node. node_count is the number of allocated nodes in this
+ * node. The scaling beyond MAPLE_NODE_SLOTS - 1 is handled by storing further
+ * nodes into state->alloc->slot[0]'s node. Nodes are taken from state->alloc
+ * by removing a node from the state->alloc node until state->alloc->node_count
+ * is 1, when state->alloc is returned and the state->alloc->slot[0] is promoted
+ * to state->alloc. Nodes are pushed onto state->alloc by putting the current
+ * state->alloc into the pushed node's slot[0].
+ *
+ * The state also contains the implied min/max of the state->node, the depth of
+ * this search, and the offset. The implied min/max are either from the parent
+ * node or are 0-oo for the root node. The depth is incremented or decremented
+ * every time a node is walked down or up. The offset is the slot/pivot of
+ * interest in the node - either for reading or writing.
+ *
+ * When returning a value the maple state index and last respectively contain
+ * the start and end of the range for the entry. Ranges are inclusive in the
+ * Maple Tree.
+ */
+struct ma_state {
+ struct maple_tree *tree; /* The tree we're operating in */
+ unsigned long index; /* The index we're operating on - range start */
+ unsigned long last; /* The last index we're operating on - range end */
+ struct maple_enode *node; /* The node containing this entry */
+ unsigned long min; /* The minimum index of this node - implied pivot min */
+ unsigned long max; /* The maximum index of this node - implied pivot max */
+ struct maple_alloc *alloc; /* Allocated nodes for this operation */
+ unsigned char depth; /* depth of tree descent during write */
+ unsigned char offset;
+ unsigned char mas_flags;
+};
+
+struct ma_wr_state {
+ struct ma_state *mas;
+ struct maple_node *node; /* Decoded mas->node */
+ unsigned long r_min; /* range min */
+ unsigned long r_max; /* range max */
+ enum maple_type type; /* mas->node type */
+ unsigned char offset_end; /* The offset where the write ends */
+ unsigned char node_end; /* mas->node end */
+ unsigned long *pivots; /* mas->node->pivots pointer */
+ unsigned long end_piv; /* The pivot at the offset end */
+ void __rcu **slots; /* mas->node->slots pointer */
+ void *entry; /* The entry to write */
+ void *content; /* The existing entry that is being overwritten */
+};
+
+#define mas_lock(mas) spin_lock(&((mas)->tree->ma_lock))
+#define mas_unlock(mas) spin_unlock(&((mas)->tree->ma_lock))
+
+
+/*
+ * Special values for ma_state.node.
+ * MAS_START means we have not searched the tree.
+ * MAS_ROOT means we have searched the tree and the entry we found lives in
+ * the root of the tree (ie it has index 0, length 1 and is the only entry in
+ * the tree).
+ * MAS_NONE means we have searched the tree and there is no node in the
+ * tree for this entry. For example, we searched for index 1 in an empty
+ * tree. Or we have a tree which points to a full leaf node and we
+ * searched for an entry which is larger than can be contained in that
+ * leaf node.
+ * MA_ERROR represents an errno. After dropping the lock and attempting
+ * to resolve the error, the walk would have to be restarted from the
+ * top of the tree as the tree may have been modified.
+ */
+#define MAS_START ((struct maple_enode *)1UL)
+#define MAS_ROOT ((struct maple_enode *)5UL)
+#define MAS_NONE ((struct maple_enode *)9UL)
+#define MAS_PAUSE ((struct maple_enode *)17UL)
+#define MA_ERROR(err) \
+ ((struct maple_enode *)(((unsigned long)err << 2) | 2UL))
+
+#define MA_STATE(name, mt, first, end) \
+ struct ma_state name = { \
+ .tree = mt, \
+ .index = first, \
+ .last = end, \
+ .node = MAS_START, \
+ .min = 0, \
+ .max = ULONG_MAX, \
+ .alloc = NULL, \
+ }
+
+#define MA_WR_STATE(name, ma_state, wr_entry) \
+ struct ma_wr_state name = { \
+ .mas = ma_state, \
+ .content = NULL, \
+ .entry = wr_entry, \
+ }
+
+#define MA_TOPIARY(name, tree) \
+ struct ma_topiary name = { \
+ .head = NULL, \
+ .tail = NULL, \
+ .mtree = tree, \
+ }
+
+void *mas_walk(struct ma_state *mas);
+void *mas_store(struct ma_state *mas, void *entry);
+void *mas_erase(struct ma_state *mas);
+int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp);
+void mas_store_prealloc(struct ma_state *mas, void *entry);
+void *mas_find(struct ma_state *mas, unsigned long max);
+void *mas_find_rev(struct ma_state *mas, unsigned long min);
+int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp);
+bool mas_is_err(struct ma_state *mas);
+
+bool mas_nomem(struct ma_state *mas, gfp_t gfp);
+void mas_pause(struct ma_state *mas);
+void maple_tree_init(void);
+void mas_destroy(struct ma_state *mas);
+int mas_expected_entries(struct ma_state *mas, unsigned long nr_entries);
+
+void *mas_prev(struct ma_state *mas, unsigned long min);
+void *mas_next(struct ma_state *mas, unsigned long max);
+
+int mas_empty_area(struct ma_state *mas, unsigned long min, unsigned long max,
+ unsigned long size);
+
+/* Checks if a mas has not found anything */
+static inline bool mas_is_none(struct ma_state *mas)
+{
+ return mas->node == MAS_NONE;
+}
+
+/* Checks if a mas has been paused */
+static inline bool mas_is_paused(struct ma_state *mas)
+{
+ return mas->node == MAS_PAUSE;
+}
+
+void mas_dup_tree(struct ma_state *oldmas, struct ma_state *mas);
+void mas_dup_store(struct ma_state *mas, void *entry);
+
+/*
+ * This finds an empty area from the highest address to the lowest.
+ * AKA "Topdown" version,
+ */
+int mas_empty_area_rev(struct ma_state *mas, unsigned long min,
+ unsigned long max, unsigned long size);
+/**
+ * mas_reset() - Reset a Maple Tree operation state.
+ * @mas: Maple Tree operation state.
+ *
+ * Resets the error or walk state of the @mas so future walks of the
+ * array will start from the root. Use this if you have dropped the
+ * lock and want to reuse the ma_state.
+ *
+ * Context: Any context.
+ */
+static inline void mas_reset(struct ma_state *mas)
+{
+ mas->node = MAS_START;
+}
+
+/**
+ * mas_for_each() - Iterate over a range of the maple tree.
+ * @__mas: Maple Tree operation state (maple_state)
+ * @__entry: Entry retrieved from the tree
+ * @__max: maximum index to retrieve from the tree
+ *
+ * When returned, mas->index and mas->last will hold the entire range for the
+ * entry.
+ *
+ * Note: may return the zero entry.
+ *
+ */
+#define mas_for_each(__mas, __entry, __max) \
+ while (((__entry) = mas_find((__mas), (__max))) != NULL)
+
+
+/**
+ * mas_set_range() - Set up Maple Tree operation state for a different index.
+ * @mas: Maple Tree operation state.
+ * @start: New start of range in the Maple Tree.
+ * @last: New end of range in the Maple Tree.
+ *
+ * Move the operation state to refer to a different range. This will
+ * have the effect of starting a walk from the top; see mas_next()
+ * to move to an adjacent index.
+ */
+static inline
+void mas_set_range(struct ma_state *mas, unsigned long start, unsigned long last)
+{
+ mas->index = start;
+ mas->last = last;
+ mas->node = MAS_START;
+}
+
+/**
+ * mas_set() - Set up Maple Tree operation state for a different index.
+ * @mas: Maple Tree operation state.
+ * @index: New index into the Maple Tree.
+ *
+ * Move the operation state to refer to a different index. This will
+ * have the effect of starting a walk from the top; see mas_next()
+ * to move to an adjacent index.
+ */
+static inline void mas_set(struct ma_state *mas, unsigned long index)
+{
+
+ mas_set_range(mas, index, index);
+}
+
+static inline bool mt_external_lock(const struct maple_tree *mt)
+{
+ return (mt->ma_flags & MT_FLAGS_LOCK_MASK) == MT_FLAGS_LOCK_EXTERN;
+}
+
+/**
+ * mt_init_flags() - Initialise an empty maple tree with flags.
+ * @mt: Maple Tree
+ * @flags: maple tree flags.
+ *
+ * If you need to initialise a Maple Tree with special flags (eg, an
+ * allocation tree), use this function.
+ *
+ * Context: Any context.
+ */
+static inline void mt_init_flags(struct maple_tree *mt, unsigned int flags)
+{
+ mt->ma_flags = flags;
+ if (!mt_external_lock(mt))
+ spin_lock_init(&mt->ma_lock);
+ rcu_assign_pointer(mt->ma_root, NULL);
+}
+
+/**
+ * mt_init() - Initialise an empty maple tree.
+ * @mt: Maple Tree
+ *
+ * An empty Maple Tree.
+ *
+ * Context: Any context.
+ */
+static inline void mt_init(struct maple_tree *mt)
+{
+ mt_init_flags(mt, 0);
+}
+
+static inline bool mt_in_rcu(struct maple_tree *mt)
+{
+#ifdef CONFIG_MAPLE_RCU_DISABLED
+ return false;
+#endif
+ return mt->ma_flags & MT_FLAGS_USE_RCU;
+}
+
+/**
+ * mt_clear_in_rcu() - Switch the tree to non-RCU mode.
+ * @mt: The Maple Tree
+ */
+static inline void mt_clear_in_rcu(struct maple_tree *mt)
+{
+ if (!mt_in_rcu(mt))
+ return;
+
+ if (mt_external_lock(mt)) {
+ BUG_ON(!mt_lock_is_held(mt));
+ mt->ma_flags &= ~MT_FLAGS_USE_RCU;
+ } else {
+ mtree_lock(mt);
+ mt->ma_flags &= ~MT_FLAGS_USE_RCU;
+ mtree_unlock(mt);
+ }
+}
+
+/**
+ * mt_set_in_rcu() - Switch the tree to RCU safe mode.
+ * @mt: The Maple Tree
+ */
+static inline void mt_set_in_rcu(struct maple_tree *mt)
+{
+ if (mt_in_rcu(mt))
+ return;
+
+ if (mt_external_lock(mt)) {
+ BUG_ON(!mt_lock_is_held(mt));
+ mt->ma_flags |= MT_FLAGS_USE_RCU;
+ } else {
+ mtree_lock(mt);
+ mt->ma_flags |= MT_FLAGS_USE_RCU;
+ mtree_unlock(mt);
+ }
+}
+
+static inline unsigned int mt_height(const struct maple_tree *mt)
+
+{
+ return (mt->ma_flags & MT_FLAGS_HEIGHT_MASK) >> MT_FLAGS_HEIGHT_OFFSET;
+}
+
+void *mt_find(struct maple_tree *mt, unsigned long *index, unsigned long max);
+void *mt_find_after(struct maple_tree *mt, unsigned long *index,
+ unsigned long max);
+void *mt_prev(struct maple_tree *mt, unsigned long index, unsigned long min);
+void *mt_next(struct maple_tree *mt, unsigned long index, unsigned long max);
+
+/**
+ * mt_for_each - Iterate over each entry starting at index until max.
+ * @__tree: The Maple Tree
+ * @__entry: The current entry
+ * @__index: The index to update to track the location in the tree
+ * @__max: The maximum limit for @index
+ *
+ * Note: Will not return the zero entry.
+ */
+#define mt_for_each(__tree, __entry, __index, __max) \
+ for (__entry = mt_find(__tree, &(__index), __max); \
+ __entry; __entry = mt_find_after(__tree, &(__index), __max))
+
+
+#ifdef CONFIG_DEBUG_MAPLE_TREE
+extern atomic_t maple_tree_tests_run;
+extern atomic_t maple_tree_tests_passed;
+
+void mt_dump(const struct maple_tree *mt);
+void mt_validate(struct maple_tree *mt);
+void mt_cache_shrink(void);
+#define MT_BUG_ON(__tree, __x) do { \
+ atomic_inc(&maple_tree_tests_run); \
+ if (__x) { \
+ pr_info("BUG at %s:%d (%u)\n", \
+ __func__, __LINE__, __x); \
+ mt_dump(__tree); \
+ pr_info("Pass: %u Run:%u\n", \
+ atomic_read(&maple_tree_tests_passed), \
+ atomic_read(&maple_tree_tests_run)); \
+ dump_stack(); \
+ } else { \
+ atomic_inc(&maple_tree_tests_passed); \
+ } \
+} while (0)
+#else
+#define MT_BUG_ON(__tree, __x) BUG_ON(__x)
+#endif /* CONFIG_DEBUG_MAPLE_TREE */
+
+#endif /*_LINUX_MAPLE_TREE_H */
diff --git a/include/linux/math64.h b/include/linux/math64.h
index a14f40de1dca..8958f4c005c1 100644
--- a/include/linux/math64.h
+++ b/include/linux/math64.h
@@ -29,7 +29,7 @@ static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder)
return dividend / divisor;
}
-/*
+/**
* div_s64_rem - signed 64bit divide with 32bit divisor with remainder
* @dividend: signed 64bit dividend
* @divisor: signed 32bit divisor
@@ -43,7 +43,7 @@ static inline s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder)
return dividend / divisor;
}
-/*
+/**
* div64_u64_rem - unsigned 64bit divide with 64bit divisor and remainder
* @dividend: unsigned 64bit dividend
* @divisor: unsigned 64bit divisor
@@ -57,7 +57,7 @@ static inline u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder)
return dividend / divisor;
}
-/*
+/**
* div64_u64 - unsigned 64bit divide with 64bit divisor
* @dividend: unsigned 64bit dividend
* @divisor: unsigned 64bit divisor
@@ -69,7 +69,7 @@ static inline u64 div64_u64(u64 dividend, u64 divisor)
return dividend / divisor;
}
-/*
+/**
* div64_s64 - signed 64bit divide with 64bit divisor
* @dividend: signed 64bit dividend
* @divisor: signed 64bit divisor
@@ -120,6 +120,8 @@ extern s64 div64_s64(s64 dividend, s64 divisor);
* This is the most common 64bit divide and should be used if possible,
* as many 32bit archs can optimize this variant better than a full 64bit
* divide.
+ *
+ * Return: dividend / divisor
*/
#ifndef div_u64
static inline u64 div_u64(u64 dividend, u32 divisor)
@@ -133,6 +135,8 @@ static inline u64 div_u64(u64 dividend, u32 divisor)
* div_s64 - signed 64bit divide with 32bit divisor
* @dividend: signed 64bit dividend
* @divisor: signed 32bit divisor
+ *
+ * Return: dividend / divisor
*/
#ifndef div_s64
static inline s64 div_s64(s64 dividend, s32 divisor)
@@ -284,6 +288,16 @@ static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 divisor)
u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div);
+/**
+ * DIV64_U64_ROUND_UP - unsigned 64bit divide with 64bit divisor rounded up
+ * @ll: unsigned 64bit dividend
+ * @d: unsigned 64bit divisor
+ *
+ * Divide unsigned 64bit dividend by unsigned 64bit divisor
+ * and round up.
+ *
+ * Return: dividend / divisor rounded up
+ */
#define DIV64_U64_ROUND_UP(ll, d) \
({ u64 _tmp = (d); div64_u64((ll) + _tmp - 1, _tmp); })
@@ -300,7 +314,7 @@ u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div);
#define DIV64_U64_ROUND_CLOSEST(dividend, divisor) \
({ u64 _tmp = (divisor); div64_u64((dividend) + _tmp / 2, _tmp); })
-/*
+/**
* DIV_U64_ROUND_CLOSEST - unsigned 64bit divide with 32bit divisor rounded to nearest integer
* @dividend: unsigned 64bit dividend
* @divisor: unsigned 32bit divisor
@@ -313,7 +327,7 @@ u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div);
#define DIV_U64_ROUND_CLOSEST(dividend, divisor) \
({ u32 _tmp = (divisor); div_u64((u64)(dividend) + _tmp / 2, _tmp); })
-/*
+/**
* DIV_S64_ROUND_CLOSEST - signed 64bit divide with 32bit divisor rounded to nearest integer
* @dividend: signed 64bit dividend
* @divisor: signed 32bit divisor
diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h
index 2da63fd7b98f..97e64184767d 100644
--- a/include/linux/mbcache.h
+++ b/include/linux/mbcache.h
@@ -10,6 +10,12 @@
struct mb_cache;
+/* Cache entry flags */
+enum {
+ MBE_REFERENCED_B = 0,
+ MBE_REUSABLE_B
+};
+
struct mb_cache_entry {
/* List of entries in cache - protected by cache->c_list_lock */
struct list_head e_list;
@@ -26,8 +32,7 @@ struct mb_cache_entry {
atomic_t e_refcnt;
/* Key in hash - stable during lifetime of the entry */
u32 e_key;
- u32 e_referenced:1;
- u32 e_reusable:1;
+ unsigned long e_flags;
/* User provided value - stable during lifetime of the entry */
u64 e_value;
};
diff --git a/include/linux/mdev.h b/include/linux/mdev.h
index 47ad3b104d9e..139d05b26f82 100644
--- a/include/linux/mdev.h
+++ b/include/linux/mdev.h
@@ -10,6 +10,9 @@
#ifndef MDEV_H
#define MDEV_H
+#include <linux/device.h>
+#include <linux/uuid.h>
+
struct mdev_type;
struct mdev_device {
@@ -20,67 +23,67 @@ struct mdev_device {
bool active;
};
-static inline struct mdev_device *to_mdev_device(struct device *dev)
-{
- return container_of(dev, struct mdev_device, dev);
-}
+struct mdev_type {
+ /* set by the driver before calling mdev_register parent: */
+ const char *sysfs_name;
+ const char *pretty_name;
-unsigned int mdev_get_type_group_id(struct mdev_device *mdev);
-unsigned int mtype_get_type_group_id(struct mdev_type *mtype);
-struct device *mtype_get_parent_dev(struct mdev_type *mtype);
+ /* set by the core, can be used drivers */
+ struct mdev_parent *parent;
-/* interface for exporting mdev supported type attributes */
-struct mdev_type_attribute {
- struct attribute attr;
- ssize_t (*show)(struct mdev_type *mtype,
- struct mdev_type_attribute *attr, char *buf);
- ssize_t (*store)(struct mdev_type *mtype,
- struct mdev_type_attribute *attr, const char *buf,
- size_t count);
+ /* internal only */
+ struct kobject kobj;
+ struct kobject *devices_kobj;
};
-#define MDEV_TYPE_ATTR(_name, _mode, _show, _store) \
-struct mdev_type_attribute mdev_type_attr_##_name = \
- __ATTR(_name, _mode, _show, _store)
-#define MDEV_TYPE_ATTR_RW(_name) \
- struct mdev_type_attribute mdev_type_attr_##_name = __ATTR_RW(_name)
-#define MDEV_TYPE_ATTR_RO(_name) \
- struct mdev_type_attribute mdev_type_attr_##_name = __ATTR_RO(_name)
-#define MDEV_TYPE_ATTR_WO(_name) \
- struct mdev_type_attribute mdev_type_attr_##_name = __ATTR_WO(_name)
+/* embedded into the struct device that the mdev devices hang off */
+struct mdev_parent {
+ struct device *dev;
+ struct mdev_driver *mdev_driver;
+ struct kset *mdev_types_kset;
+ /* Synchronize device creation/removal with parent unregistration */
+ struct rw_semaphore unreg_sem;
+ struct mdev_type **types;
+ unsigned int nr_types;
+ atomic_t available_instances;
+};
+
+static inline struct mdev_device *to_mdev_device(struct device *dev)
+{
+ return container_of(dev, struct mdev_device, dev);
+}
/**
* struct mdev_driver - Mediated device driver
+ * @device_api: string to return for the device_api sysfs
+ * @max_instances: maximum number of instances supported (optional)
* @probe: called when new device created
* @remove: called when device removed
- * @supported_type_groups: Attributes to define supported types. It is mandatory
- * to provide supported types.
+ * @get_available: Return the max number of instances that can be created
+ * @show_description: Print a description of the mtype
* @driver: device driver structure
- *
**/
struct mdev_driver {
+ const char *device_api;
+ unsigned int max_instances;
int (*probe)(struct mdev_device *dev);
void (*remove)(struct mdev_device *dev);
- struct attribute_group **supported_type_groups;
+ unsigned int (*get_available)(struct mdev_type *mtype);
+ ssize_t (*show_description)(struct mdev_type *mtype, char *buf);
struct device_driver driver;
};
-extern struct bus_type mdev_bus_type;
-
-int mdev_register_device(struct device *dev, struct mdev_driver *mdev_driver);
-void mdev_unregister_device(struct device *dev);
+int mdev_register_parent(struct mdev_parent *parent, struct device *dev,
+ struct mdev_driver *mdev_driver, struct mdev_type **types,
+ unsigned int nr_types);
+void mdev_unregister_parent(struct mdev_parent *parent);
int mdev_register_driver(struct mdev_driver *drv);
void mdev_unregister_driver(struct mdev_driver *drv);
-struct device *mdev_parent_dev(struct mdev_device *mdev);
static inline struct device *mdev_dev(struct mdev_device *mdev)
{
return &mdev->dev;
}
-static inline struct mdev_device *mdev_from_dev(struct device *dev)
-{
- return dev->bus == &mdev_bus_type ? to_mdev_device(dev) : NULL;
-}
#endif /* MDEV_H */
diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index 00177567cfef..f7fbbf3069e7 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -488,6 +488,19 @@ static inline int mdiobus_c45_write(struct mii_bus *bus, int prtad, int devad,
return mdiobus_write(bus, prtad, mdiobus_c45_addr(devad, regnum), val);
}
+static inline int mdiodev_c45_read(struct mdio_device *mdiodev, int devad,
+ u16 regnum)
+{
+ return mdiobus_c45_read(mdiodev->bus, mdiodev->addr, devad, regnum);
+}
+
+static inline int mdiodev_c45_write(struct mdio_device *mdiodev, u32 devad,
+ u16 regnum, u16 val)
+{
+ return mdiobus_c45_write(mdiodev->bus, mdiodev->addr, devad, regnum,
+ val);
+}
+
int mdiobus_register_device(struct mdio_device *mdiodev);
int mdiobus_unregister_device(struct mdio_device *mdiodev);
bool mdiobus_is_registered_device(struct mii_bus *bus, int addr);
diff --git a/include/linux/mdio/mdio-i2c.h b/include/linux/mdio/mdio-i2c.h
index b1d27f7cd23f..65b550a6fc32 100644
--- a/include/linux/mdio/mdio-i2c.h
+++ b/include/linux/mdio/mdio-i2c.h
@@ -11,6 +11,14 @@ struct device;
struct i2c_adapter;
struct mii_bus;
-struct mii_bus *mdio_i2c_alloc(struct device *parent, struct i2c_adapter *i2c);
+enum mdio_i2c_proto {
+ MDIO_I2C_NONE,
+ MDIO_I2C_MARVELL_C22,
+ MDIO_I2C_C45,
+ MDIO_I2C_ROLLBALL,
+};
+
+struct mii_bus *mdio_i2c_alloc(struct device *parent, struct i2c_adapter *i2c,
+ enum mdio_i2c_proto protocol);
#endif
diff --git a/include/linux/mei_aux.h b/include/linux/mei_aux.h
index 587f25128848..506912ad363b 100644
--- a/include/linux/mei_aux.h
+++ b/include/linux/mei_aux.h
@@ -7,10 +7,22 @@
#include <linux/auxiliary_bus.h>
+/**
+ * struct mei_aux_device - mei auxiliary device
+ * @aux_dev: - auxiliary device object
+ * @irq: interrupt driving the mei auxiliary device
+ * @bar: mmio resource bar reserved to mei auxiliary device
+ * @ext_op_mem: resource for extend operational memory
+ * used in graphics PXP mode.
+ * @slow_firmware: The device has slow underlying firmware.
+ * Such firmware will require to use larger operation timeouts.
+ */
struct mei_aux_device {
struct auxiliary_device aux_dev;
int irq;
struct resource bar;
+ struct resource ext_op_mem;
+ bool slow_firmware;
};
#define auxiliary_dev_to_mei_aux_dev(auxiliary_dev) \
diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h
index df1fab44ea5c..fd6e0620658d 100644
--- a/include/linux/mei_cl_bus.h
+++ b/include/linux/mei_cl_bus.h
@@ -11,6 +11,7 @@
struct mei_cl_device;
struct mei_device;
+struct scatterlist;
typedef void (*mei_cldev_cb_t)(struct mei_cl_device *cldev);
@@ -116,6 +117,11 @@ void mei_cldev_set_drvdata(struct mei_cl_device *cldev, void *data);
int mei_cldev_enable(struct mei_cl_device *cldev);
int mei_cldev_disable(struct mei_cl_device *cldev);
bool mei_cldev_enabled(const struct mei_cl_device *cldev);
+ssize_t mei_cldev_send_gsc_command(struct mei_cl_device *cldev,
+ u8 client_id, u32 fence_id,
+ struct scatterlist *sg_in,
+ size_t total_in_len,
+ struct scatterlist *sg_out);
void *mei_cldev_dma_map(struct mei_cl_device *cldev, u8 buffer_id, size_t size);
int mei_cldev_dma_unmap(struct mei_cl_device *cldev);
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 4d31ce55b1c0..d3c8203cab6c 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -80,29 +80,8 @@ enum mem_cgroup_events_target {
MEM_CGROUP_NTARGETS,
};
-struct memcg_vmstats_percpu {
- /* Local (CPU and cgroup) page state & events */
- long state[MEMCG_NR_STAT];
- unsigned long events[NR_VM_EVENT_ITEMS];
-
- /* Delta calculation for lockless upward propagation */
- long state_prev[MEMCG_NR_STAT];
- unsigned long events_prev[NR_VM_EVENT_ITEMS];
-
- /* Cgroup1: threshold notifications & softlimit tree updates */
- unsigned long nr_page_events;
- unsigned long targets[MEM_CGROUP_NTARGETS];
-};
-
-struct memcg_vmstats {
- /* Aggregated (CPU and subtree) page state & events */
- long state[MEMCG_NR_STAT];
- unsigned long events[NR_VM_EVENT_ITEMS];
-
- /* Pending child counts during tree propagation */
- long state_pending[MEMCG_NR_STAT];
- unsigned long events_pending[NR_VM_EVENT_ITEMS];
-};
+struct memcg_vmstats_percpu;
+struct memcg_vmstats;
struct mem_cgroup_reclaim_iter {
struct mem_cgroup *position;
@@ -185,15 +164,6 @@ struct mem_cgroup_thresholds {
struct mem_cgroup_threshold_ary *spare;
};
-#if defined(CONFIG_SMP)
-struct memcg_padding {
- char x[0];
-} ____cacheline_internodealigned_in_smp;
-#define MEMCG_PADDING(name) struct memcg_padding name
-#else
-#define MEMCG_PADDING(name)
-#endif
-
/*
* Remember four most recent foreign writebacks with dirty pages in this
* cgroup. Inode sharing is expected to be uncommon and, even if we miss
@@ -304,10 +274,10 @@ struct mem_cgroup {
spinlock_t move_lock;
unsigned long move_lock_flags;
- MEMCG_PADDING(_pad1_);
+ CACHELINE_PADDING(_pad1_);
/* memory.stat */
- struct memcg_vmstats vmstats;
+ struct memcg_vmstats *vmstats;
/* memory.events */
atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS];
@@ -326,7 +296,7 @@ struct mem_cgroup {
struct list_head objcg_list;
#endif
- MEMCG_PADDING(_pad2_);
+ CACHELINE_PADDING(_pad2_);
/*
* set > 0 if pages under this cgroup are moving to other cgroup.
@@ -350,14 +320,20 @@ struct mem_cgroup {
struct deferred_split deferred_split_queue;
#endif
+#ifdef CONFIG_LRU_GEN
+ /* per-memcg mm_struct list */
+ struct lru_gen_mm_list mm_list;
+#endif
+
struct mem_cgroup_per_node *nodeinfo[];
};
/*
- * size of first charge trial. "32" comes from vmscan.c's magic value.
- * TODO: maybe necessary to use big numbers in big irons.
+ * size of first charge trial.
+ * TODO: maybe necessary to use big numbers in big irons or dynamic based of the
+ * workload.
*/
-#define MEMCG_CHARGE_BATCH 32U
+#define MEMCG_CHARGE_BATCH 64U
extern struct mem_cgroup *root_mem_cgroup;
@@ -444,6 +420,7 @@ static inline struct obj_cgroup *__folio_objcg(struct folio *folio)
* - LRU isolation
* - lock_page_memcg()
* - exclusive reference
+ * - mem_cgroup_trylock_pages()
*
* For a kmem folio a caller should hold an rcu read lock to protect memcg
* associated with a kmem folio from being released.
@@ -505,6 +482,7 @@ static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio)
* - LRU isolation
* - lock_page_memcg()
* - exclusive reference
+ * - mem_cgroup_trylock_pages()
*
* For a kmem page a caller should hold an rcu read lock to protect memcg
* associated with a kmem page from being released.
@@ -637,28 +615,32 @@ static inline void mem_cgroup_protection(struct mem_cgroup *root,
void mem_cgroup_calculate_protection(struct mem_cgroup *root,
struct mem_cgroup *memcg);
-static inline bool mem_cgroup_supports_protection(struct mem_cgroup *memcg)
+static inline bool mem_cgroup_unprotected(struct mem_cgroup *target,
+ struct mem_cgroup *memcg)
{
/*
* The root memcg doesn't account charges, and doesn't support
- * protection.
+ * protection. The target memcg's protection is ignored, see
+ * mem_cgroup_calculate_protection() and mem_cgroup_protection()
*/
- return !mem_cgroup_disabled() && !mem_cgroup_is_root(memcg);
-
+ return mem_cgroup_disabled() || mem_cgroup_is_root(memcg) ||
+ memcg == target;
}
-static inline bool mem_cgroup_below_low(struct mem_cgroup *memcg)
+static inline bool mem_cgroup_below_low(struct mem_cgroup *target,
+ struct mem_cgroup *memcg)
{
- if (!mem_cgroup_supports_protection(memcg))
+ if (mem_cgroup_unprotected(target, memcg))
return false;
return READ_ONCE(memcg->memory.elow) >=
page_counter_read(&memcg->memory);
}
-static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg)
+static inline bool mem_cgroup_below_min(struct mem_cgroup *target,
+ struct mem_cgroup *memcg)
{
- if (!mem_cgroup_supports_protection(memcg))
+ if (mem_cgroup_unprotected(target, memcg))
return false;
return READ_ONCE(memcg->memory.emin) >=
@@ -689,7 +671,7 @@ static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm,
return __mem_cgroup_charge(folio, mm, gfp);
}
-int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm,
+int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm,
gfp_t gfp, swp_entry_t entry);
void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry);
@@ -959,6 +941,23 @@ void unlock_page_memcg(struct page *page);
void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val);
+/* try to stablize folio_memcg() for all the pages in a memcg */
+static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg)
+{
+ rcu_read_lock();
+
+ if (mem_cgroup_disabled() || !atomic_read(&memcg->moving_account))
+ return true;
+
+ rcu_read_unlock();
+ return false;
+}
+
+static inline void mem_cgroup_unlock_pages(void)
+{
+ rcu_read_unlock();
+}
+
/* idx can be of type enum memcg_stat_item or node_stat_item */
static inline void mod_memcg_state(struct mem_cgroup *memcg,
int idx, int val)
@@ -985,21 +984,24 @@ static inline void mod_memcg_page_state(struct page *page,
rcu_read_unlock();
}
-static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
-{
- return READ_ONCE(memcg->vmstats.state[idx]);
-}
+unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx);
static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
enum node_stat_item idx)
{
struct mem_cgroup_per_node *pn;
+ long x;
if (mem_cgroup_disabled())
return node_page_state(lruvec_pgdat(lruvec), idx);
pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
- return READ_ONCE(pn->lruvec_stats.state[idx]);
+ x = READ_ONCE(pn->lruvec_stats.state[idx]);
+#ifdef CONFIG_SMP
+ if (x < 0)
+ x = 0;
+#endif
+ return x;
}
static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
@@ -1211,12 +1213,19 @@ static inline void mem_cgroup_calculate_protection(struct mem_cgroup *root,
{
}
-static inline bool mem_cgroup_below_low(struct mem_cgroup *memcg)
+static inline bool mem_cgroup_unprotected(struct mem_cgroup *target,
+ struct mem_cgroup *memcg)
+{
+ return true;
+}
+static inline bool mem_cgroup_below_low(struct mem_cgroup *target,
+ struct mem_cgroup *memcg)
{
return false;
}
-static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg)
+static inline bool mem_cgroup_below_min(struct mem_cgroup *target,
+ struct mem_cgroup *memcg)
{
return false;
}
@@ -1227,7 +1236,7 @@ static inline int mem_cgroup_charge(struct folio *folio,
return 0;
}
-static inline int mem_cgroup_swapin_charge_page(struct page *page,
+static inline int mem_cgroup_swapin_charge_folio(struct folio *folio,
struct mm_struct *mm, gfp_t gfp, swp_entry_t entry)
{
return 0;
@@ -1422,6 +1431,18 @@ static inline void folio_memcg_unlock(struct folio *folio)
{
}
+static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg)
+{
+ /* to match folio_memcg_rcu() */
+ rcu_read_lock();
+ return true;
+}
+
+static inline void mem_cgroup_unlock_pages(void)
+{
+ rcu_read_unlock();
+}
+
static inline void mem_cgroup_handle_over_high(void)
{
}
@@ -1768,7 +1789,7 @@ static inline void count_objcg_event(struct obj_cgroup *objcg,
{
struct mem_cgroup *memcg;
- if (mem_cgroup_kmem_disabled())
+ if (!memcg_kmem_enabled())
return;
rcu_read_lock();
@@ -1777,42 +1798,6 @@ static inline void count_objcg_event(struct obj_cgroup *objcg,
rcu_read_unlock();
}
-/**
- * get_mem_cgroup_from_obj - get a memcg associated with passed kernel object.
- * @p: pointer to object from which memcg should be extracted. It can be NULL.
- *
- * Retrieves the memory group into which the memory of the pointed kernel
- * object is accounted. If memcg is found, its reference is taken.
- * If a passed kernel object is uncharged, or if proper memcg cannot be found,
- * as well as if mem_cgroup is disabled, NULL is returned.
- *
- * Return: valid memcg pointer with taken reference or NULL.
- */
-static inline struct mem_cgroup *get_mem_cgroup_from_obj(void *p)
-{
- struct mem_cgroup *memcg;
-
- rcu_read_lock();
- do {
- memcg = mem_cgroup_from_obj(p);
- } while (memcg && !css_tryget(&memcg->css));
- rcu_read_unlock();
- return memcg;
-}
-
-/**
- * mem_cgroup_or_root - always returns a pointer to a valid memory cgroup.
- * @memcg: pointer to a valid memory cgroup or NULL.
- *
- * If passed argument is not NULL, returns it without any additional checks
- * and changes. Otherwise, root_mem_cgroup is returned.
- *
- * NOTE: root_mem_cgroup can be NULL during early boot.
- */
-static inline struct mem_cgroup *mem_cgroup_or_root(struct mem_cgroup *memcg)
-{
- return memcg ? memcg : root_mem_cgroup;
-}
#else
static inline bool mem_cgroup_kmem_disabled(void)
{
@@ -1869,15 +1854,6 @@ static inline void count_objcg_event(struct obj_cgroup *objcg,
{
}
-static inline struct mem_cgroup *get_mem_cgroup_from_obj(void *p)
-{
- return NULL;
-}
-
-static inline struct mem_cgroup *mem_cgroup_or_root(struct mem_cgroup *memcg)
-{
- return NULL;
-}
#endif /* CONFIG_MEMCG_KMEM */
#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_ZSWAP)
diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
new file mode 100644
index 000000000000..fc9647b1b4f9
--- /dev/null
+++ b/include/linux/memory-tiers.h
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_MEMORY_TIERS_H
+#define _LINUX_MEMORY_TIERS_H
+
+#include <linux/types.h>
+#include <linux/nodemask.h>
+#include <linux/kref.h>
+#include <linux/mmzone.h>
+/*
+ * Each tier cover a abstrace distance chunk size of 128
+ */
+#define MEMTIER_CHUNK_BITS 7
+#define MEMTIER_CHUNK_SIZE (1 << MEMTIER_CHUNK_BITS)
+/*
+ * Smaller abstract distance values imply faster (higher) memory tiers. Offset
+ * the DRAM adistance so that we can accommodate devices with a slightly lower
+ * adistance value (slightly faster) than default DRAM adistance to be part of
+ * the same memory tier.
+ */
+#define MEMTIER_ADISTANCE_DRAM ((4 * MEMTIER_CHUNK_SIZE) + (MEMTIER_CHUNK_SIZE >> 1))
+
+struct memory_tier;
+struct memory_dev_type {
+ /* list of memory types that are part of same tier as this type */
+ struct list_head tier_sibiling;
+ /* abstract distance for this specific memory type */
+ int adistance;
+ /* Nodes of same abstract distance */
+ nodemask_t nodes;
+ struct kref kref;
+};
+
+#ifdef CONFIG_NUMA
+extern bool numa_demotion_enabled;
+struct memory_dev_type *alloc_memory_type(int adistance);
+void destroy_memory_type(struct memory_dev_type *memtype);
+void init_node_memory_type(int node, struct memory_dev_type *default_type);
+void clear_node_memory_type(int node, struct memory_dev_type *memtype);
+#ifdef CONFIG_MIGRATION
+int next_demotion_node(int node);
+void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets);
+bool node_is_toptier(int node);
+#else
+static inline int next_demotion_node(int node)
+{
+ return NUMA_NO_NODE;
+}
+
+static inline void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets)
+{
+ *targets = NODE_MASK_NONE;
+}
+
+static inline bool node_is_toptier(int node)
+{
+ return true;
+}
+#endif
+
+#else
+
+#define numa_demotion_enabled false
+/*
+ * CONFIG_NUMA implementation returns non NULL error.
+ */
+static inline struct memory_dev_type *alloc_memory_type(int adistance)
+{
+ return NULL;
+}
+
+static inline void destroy_memory_type(struct memory_dev_type *memtype)
+{
+
+}
+
+static inline void init_node_memory_type(int node, struct memory_dev_type *default_type)
+{
+
+}
+
+static inline void clear_node_memory_type(int node, struct memory_dev_type *memtype)
+{
+
+}
+
+static inline int next_demotion_node(int node)
+{
+ return NUMA_NO_NODE;
+}
+
+static inline void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets)
+{
+ *targets = NODE_MASK_NONE;
+}
+
+static inline bool node_is_toptier(int node)
+{
+ return true;
+}
+#endif /* CONFIG_NUMA */
+#endif /* _LINUX_MEMORY_TIERS_H */
diff --git a/include/linux/memory.h b/include/linux/memory.h
index aa619464a1df..31343566c221 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -19,7 +19,6 @@
#include <linux/node.h>
#include <linux/compiler.h>
#include <linux/mutex.h>
-#include <linux/notifier.h>
#define MIN_MEMORY_BLOCK_SIZE (1UL << SECTION_SIZE_BITS)
@@ -85,6 +84,9 @@ struct memory_block {
unsigned long nr_vmemmap_pages;
struct memory_group *group; /* group (if any) for this block */
struct list_head group_next; /* next block inside memory group */
+#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
+ atomic_long_t nr_hwpoison;
+#endif
};
int arch_get_memory_phys_device(unsigned long start_pfn);
@@ -113,8 +115,13 @@ struct mem_section;
* Priorities for the hotplug memory callback routines (stored in decreasing
* order in the callback chain)
*/
-#define SLAB_CALLBACK_PRI 1
-#define IPC_CALLBACK_PRI 10
+#define DEFAULT_CALLBACK_PRI 0
+#define SLAB_CALLBACK_PRI 1
+#define HMAT_CALLBACK_PRI 2
+#define MM_COMPUTE_BATCH_PRI 10
+#define CPUSET_CALLBACK_PRI 10
+#define MEMTIER_HOTPLUG_PRI 100
+#define KSM_CALLBACK_PRI 100
#ifndef CONFIG_MEMORY_HOTPLUG
static inline void memory_dev_init(void)
@@ -136,9 +143,6 @@ static inline int hotplug_memory_notifier(notifier_fn_t fn, int pri)
{
return 0;
}
-/* These aren't inline functions due to a GCC bug. */
-#define register_hotmemory_notifier(nb) ({ (void)(nb); 0; })
-#define unregister_hotmemory_notifier(nb) ({ (void)(nb); })
#else /* CONFIG_MEMORY_HOTPLUG */
extern int register_memory_notifier(struct notifier_block *nb);
extern void unregister_memory_notifier(struct notifier_block *nb);
@@ -166,8 +170,6 @@ int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func,
{ .notifier_call = fn, .priority = pri };\
register_memory_notifier(&fn##_mem_nb); \
})
-#define register_hotmemory_notifier(nb) register_memory_notifier(nb)
-#define unregister_hotmemory_notifier(nb) unregister_memory_notifier(nb)
#ifdef CONFIG_NUMA
void memory_block_add_nid(struct memory_block *mem, int nid,
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index e0b2209ab71c..9fcbf5706595 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -11,7 +11,6 @@ struct page;
struct zone;
struct pglist_data;
struct mem_section;
-struct memory_block;
struct memory_group;
struct resource;
struct vmem_altmap;
@@ -44,11 +43,6 @@ extern void arch_refresh_nodedata(int nid, pg_data_t *pgdat);
({ \
memblock_alloc(sizeof(*pgdat), SMP_CACHE_BYTES); \
})
-/*
- * This definition is just for error path in node hotadd.
- * For node hotremove, we have to replace this.
- */
-#define generic_free_nodedata(pgdat) kfree(pgdat)
extern pg_data_t *node_data[];
static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat)
@@ -64,9 +58,6 @@ static inline pg_data_t *generic_alloc_nodedata(int nid)
BUG();
return NULL;
}
-static inline void generic_free_nodedata(pg_data_t *pgdat)
-{
-}
static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat)
{
}
@@ -216,6 +207,22 @@ void put_online_mems(void);
void mem_hotplug_begin(void);
void mem_hotplug_done(void);
+/* See kswapd_is_running() */
+static inline void pgdat_kswapd_lock(pg_data_t *pgdat)
+{
+ mutex_lock(&pgdat->kswapd_lock);
+}
+
+static inline void pgdat_kswapd_unlock(pg_data_t *pgdat)
+{
+ mutex_unlock(&pgdat->kswapd_lock);
+}
+
+static inline void pgdat_kswapd_lock_init(pg_data_t *pgdat)
+{
+ mutex_init(&pgdat->kswapd_lock);
+}
+
#else /* ! CONFIG_MEMORY_HOTPLUG */
#define pfn_to_online_page(pfn) \
({ \
@@ -252,6 +259,10 @@ static inline bool movable_node_is_enabled(void)
{
return false;
}
+
+static inline void pgdat_kswapd_lock(pg_data_t *pgdat) {}
+static inline void pgdat_kswapd_unlock(pg_data_t *pgdat) {}
+static inline void pgdat_kswapd_lock_init(pg_data_t *pgdat) {}
#endif /* ! CONFIG_MEMORY_HOTPLUG */
/*
@@ -333,7 +344,6 @@ extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
extern void remove_pfn_range_from_zone(struct zone *zone,
unsigned long start_pfn,
unsigned long nr_pages);
-extern bool is_memblock_offlined(struct memory_block *mem);
extern int sparse_add_section(int nid, unsigned long pfn,
unsigned long nr_pages, struct vmem_altmap *altmap,
struct dev_pagemap *pgmap);
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 668389b4b53d..d232de7cdc56 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -151,13 +151,6 @@ extern bool mempolicy_in_oom_domain(struct task_struct *tsk,
const nodemask_t *mask);
extern nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy);
-static inline nodemask_t *policy_nodemask_current(gfp_t gfp)
-{
- struct mempolicy *mpol = get_task_policy(current);
-
- return policy_nodemask(gfp, mpol);
-}
-
extern unsigned int mempolicy_slab_node(void);
extern enum zone_type policy_zone;
@@ -189,6 +182,7 @@ static inline bool mpol_is_preferred_many(struct mempolicy *pol)
return (pol->mode == MPOL_PREFERRED_MANY);
}
+extern bool apply_policy_zone(struct mempolicy *policy, enum zone_type zone);
#else
@@ -294,11 +288,6 @@ static inline void mpol_put_task_policy(struct task_struct *task)
{
}
-static inline nodemask_t *policy_nodemask_current(gfp_t gfp)
-{
- return NULL;
-}
-
static inline bool mpol_is_preferred_many(struct mempolicy *pol)
{
return false;
diff --git a/include/linux/mempool.h b/include/linux/mempool.h
index 0c964ac107c2..4aae6c06c5f2 100644
--- a/include/linux/mempool.h
+++ b/include/linux/mempool.h
@@ -30,6 +30,11 @@ static inline bool mempool_initialized(mempool_t *pool)
return pool->elements != NULL;
}
+static inline bool mempool_is_saturated(mempool_t *pool)
+{
+ return READ_ONCE(pool->curr_nr) >= pool->min_nr;
+}
+
void mempool_exit(mempool_t *pool);
int mempool_init_node(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn,
mempool_free_t *free_fn, void *pool_data,
diff --git a/include/linux/memregion.h b/include/linux/memregion.h
index c04c4fd2e209..bf83363807ac 100644
--- a/include/linux/memregion.h
+++ b/include/linux/memregion.h
@@ -3,6 +3,7 @@
#define _MEMREGION_H_
#include <linux/types.h>
#include <linux/errno.h>
+#include <linux/bug.h>
struct memregion_info {
int target_node;
@@ -20,4 +21,41 @@ static inline void memregion_free(int id)
{
}
#endif
+
+/**
+ * cpu_cache_invalidate_memregion - drop any CPU cached data for
+ * memregions described by @res_desc
+ * @res_desc: one of the IORES_DESC_* types
+ *
+ * Perform cache maintenance after a memory event / operation that
+ * changes the contents of physical memory in a cache-incoherent manner.
+ * For example, device memory technologies like NVDIMM and CXL have
+ * device secure erase, and dynamic region provision that can replace
+ * the memory mapped to a given physical address.
+ *
+ * Limit the functionality to architectures that have an efficient way
+ * to writeback and invalidate potentially terabytes of address space at
+ * once. Note that this routine may or may not write back any dirty
+ * contents while performing the invalidation. It is only exported for
+ * the explicit usage of the NVDIMM and CXL modules in the 'DEVMEM'
+ * symbol namespace on bare platforms.
+ *
+ * Returns 0 on success or negative error code on a failure to perform
+ * the cache maintenance.
+ */
+#ifdef CONFIG_ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION
+int cpu_cache_invalidate_memregion(int res_desc);
+bool cpu_cache_has_invalidate_memregion(void);
+#else
+static inline bool cpu_cache_has_invalidate_memregion(void)
+{
+ return false;
+}
+
+static inline int cpu_cache_invalidate_memregion(int res_desc)
+{
+ WARN_ON_ONCE("CPU cache invalidation required");
+ return -ENXIO;
+}
+#endif
#endif /* _MEMREGION_H_ */
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 19010491a603..7fcaf3180a5b 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -139,6 +139,11 @@ struct dev_pagemap {
};
};
+static inline bool pgmap_has_memory_failure(struct dev_pagemap *pgmap)
+{
+ return pgmap->ops && pgmap->ops->memory_failure;
+}
+
static inline struct vmem_altmap *pgmap_altmap(struct dev_pagemap *pgmap)
{
if (pgmap->flags & PGMAP_ALTMAP_VALID)
@@ -182,6 +187,7 @@ static inline bool folio_is_device_coherent(const struct folio *folio)
}
#ifdef CONFIG_ZONE_DEVICE
+void zone_device_page_init(struct page *page);
void *memremap_pages(struct dev_pagemap *pgmap, int nid);
void memunmap_pages(struct dev_pagemap *pgmap);
void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap);
diff --git a/include/linux/mfd/max8997.h b/include/linux/mfd/max8997.h
index 6c98edcf4b0b..6193905abbb5 100644
--- a/include/linux/mfd/max8997.h
+++ b/include/linux/mfd/max8997.h
@@ -110,8 +110,6 @@ enum max8997_haptic_pwm_divisor {
/**
* max8997_haptic_platform_data
- * @pwm_channel_id: channel number of PWM device
- * valid for MAX8997_EXTERNAL_MODE
* @pwm_period: period in nano second for PWM device
* valid for MAX8997_EXTERNAL_MODE
* @type: motor type
@@ -128,7 +126,6 @@ enum max8997_haptic_pwm_divisor {
* [0 - 255]: available period
*/
struct max8997_haptic_platform_data {
- unsigned int pwm_channel_id;
unsigned int pwm_period;
enum max8997_haptic_motor_type type;
diff --git a/include/linux/mfd/ocelot.h b/include/linux/mfd/ocelot.h
new file mode 100644
index 000000000000..dd72073d2d4f
--- /dev/null
+++ b/include/linux/mfd/ocelot.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/* Copyright 2022 Innovative Advantage Inc. */
+
+#ifndef _LINUX_MFD_OCELOT_H
+#define _LINUX_MFD_OCELOT_H
+
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/ioport.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/types.h>
+
+struct resource;
+
+static inline struct regmap *
+ocelot_regmap_from_resource_optional(struct platform_device *pdev,
+ unsigned int index,
+ const struct regmap_config *config)
+{
+ struct device *dev = &pdev->dev;
+ struct resource *res;
+ void __iomem *regs;
+
+ /*
+ * Don't use _get_and_ioremap_resource() here, since that will invoke
+ * prints of "invalid resource" which will simply add confusion.
+ */
+ res = platform_get_resource(pdev, IORESOURCE_MEM, index);
+ if (res) {
+ regs = devm_ioremap_resource(dev, res);
+ if (IS_ERR(regs))
+ return ERR_CAST(regs);
+ return devm_regmap_init_mmio(dev, regs, config);
+ }
+
+ /*
+ * Fall back to using REG and getting the resource from the parent
+ * device, which is possible in an MFD configuration
+ */
+ if (dev->parent) {
+ res = platform_get_resource(pdev, IORESOURCE_REG, index);
+ if (!res)
+ return NULL;
+
+ return dev_get_regmap(dev->parent, res->name);
+ }
+
+ return NULL;
+}
+
+static inline struct regmap *
+ocelot_regmap_from_resource(struct platform_device *pdev, unsigned int index,
+ const struct regmap_config *config)
+{
+ struct regmap *map;
+
+ map = ocelot_regmap_from_resource_optional(pdev, index, config);
+ return map ?: ERR_PTR(-ENOENT);
+}
+
+#endif
diff --git a/include/linux/mfd/rk808.h b/include/linux/mfd/rk808.h
index 58602032e642..9af1f3105f80 100644
--- a/include/linux/mfd/rk808.h
+++ b/include/linux/mfd/rk808.h
@@ -519,6 +519,77 @@ enum rk809_reg_id {
#define MIC_DIFF_DIS (0x0 << 7)
#define MIC_DIFF_EN (0x1 << 7)
+/* RK817 Battery Registers */
+#define RK817_GAS_GAUGE_ADC_CONFIG0 0x50
+#define RK817_GG_EN (0x1 << 7)
+#define RK817_SYS_VOL_ADC_EN (0x1 << 6)
+#define RK817_TS_ADC_EN (0x1 << 5)
+#define RK817_USB_VOL_ADC_EN (0x1 << 4)
+#define RK817_BAT_VOL_ADC_EN (0x1 << 3)
+#define RK817_BAT_CUR_ADC_EN (0x1 << 2)
+
+#define RK817_GAS_GAUGE_ADC_CONFIG1 0x55
+
+#define RK817_VOL_CUR_CALIB_UPD BIT(7)
+
+#define RK817_GAS_GAUGE_GG_CON 0x56
+#define RK817_GAS_GAUGE_GG_STS 0x57
+
+#define RK817_BAT_CON (0x1 << 4)
+#define RK817_RELAX_VOL_UPD (0x3 << 2)
+#define RK817_RELAX_STS (0x1 << 1)
+
+#define RK817_GAS_GAUGE_RELAX_THRE_H 0x58
+#define RK817_GAS_GAUGE_RELAX_THRE_L 0x59
+#define RK817_GAS_GAUGE_OCV_THRE_VOL 0x62
+#define RK817_GAS_GAUGE_OCV_VOL_H 0x63
+#define RK817_GAS_GAUGE_OCV_VOL_L 0x64
+#define RK817_GAS_GAUGE_PWRON_VOL_H 0x6b
+#define RK817_GAS_GAUGE_PWRON_VOL_L 0x6c
+#define RK817_GAS_GAUGE_PWRON_CUR_H 0x6d
+#define RK817_GAS_GAUGE_PWRON_CUR_L 0x6e
+#define RK817_GAS_GAUGE_OFF_CNT 0x6f
+#define RK817_GAS_GAUGE_Q_INIT_H3 0x70
+#define RK817_GAS_GAUGE_Q_INIT_H2 0x71
+#define RK817_GAS_GAUGE_Q_INIT_L1 0x72
+#define RK817_GAS_GAUGE_Q_INIT_L0 0x73
+#define RK817_GAS_GAUGE_Q_PRES_H3 0x74
+#define RK817_GAS_GAUGE_Q_PRES_H2 0x75
+#define RK817_GAS_GAUGE_Q_PRES_L1 0x76
+#define RK817_GAS_GAUGE_Q_PRES_L0 0x77
+#define RK817_GAS_GAUGE_BAT_VOL_H 0x78
+#define RK817_GAS_GAUGE_BAT_VOL_L 0x79
+#define RK817_GAS_GAUGE_BAT_CUR_H 0x7a
+#define RK817_GAS_GAUGE_BAT_CUR_L 0x7b
+#define RK817_GAS_GAUGE_USB_VOL_H 0x7e
+#define RK817_GAS_GAUGE_USB_VOL_L 0x7f
+#define RK817_GAS_GAUGE_SYS_VOL_H 0x80
+#define RK817_GAS_GAUGE_SYS_VOL_L 0x81
+#define RK817_GAS_GAUGE_Q_MAX_H3 0x82
+#define RK817_GAS_GAUGE_Q_MAX_H2 0x83
+#define RK817_GAS_GAUGE_Q_MAX_L1 0x84
+#define RK817_GAS_GAUGE_Q_MAX_L0 0x85
+#define RK817_GAS_GAUGE_SLEEP_CON_SAMP_CUR_H 0x8f
+#define RK817_GAS_GAUGE_SLEEP_CON_SAMP_CUR_L 0x90
+#define RK817_GAS_GAUGE_CAL_OFFSET_H 0x91
+#define RK817_GAS_GAUGE_CAL_OFFSET_L 0x92
+#define RK817_GAS_GAUGE_VCALIB0_H 0x93
+#define RK817_GAS_GAUGE_VCALIB0_L 0x94
+#define RK817_GAS_GAUGE_VCALIB1_H 0x95
+#define RK817_GAS_GAUGE_VCALIB1_L 0x96
+#define RK817_GAS_GAUGE_IOFFSET_H 0x97
+#define RK817_GAS_GAUGE_IOFFSET_L 0x98
+#define RK817_GAS_GAUGE_BAT_R1 0x9a
+#define RK817_GAS_GAUGE_BAT_R2 0x9b
+#define RK817_GAS_GAUGE_BAT_R3 0x9c
+#define RK817_GAS_GAUGE_DATA0 0x9d
+#define RK817_GAS_GAUGE_DATA1 0x9e
+#define RK817_GAS_GAUGE_DATA2 0x9f
+#define RK817_GAS_GAUGE_DATA3 0xa0
+#define RK817_GAS_GAUGE_DATA4 0xa1
+#define RK817_GAS_GAUGE_DATA5 0xa2
+#define RK817_GAS_GAUGE_CUR_ADC_K0 0xb0
+
#define RK817_POWER_EN_REG(i) (0xb1 + (i))
#define RK817_POWER_SLP_EN_REG(i) (0xb5 + (i))
@@ -544,10 +615,30 @@ enum rk809_reg_id {
#define RK817_LDO_ON_VSEL_REG(idx) (0xcc + (idx) * 2)
#define RK817_BOOST_OTG_CFG (0xde)
+#define RK817_PMIC_CHRG_OUT 0xe4
+#define RK817_CHRG_VOL_SEL (0x07 << 4)
+#define RK817_CHRG_CUR_SEL (0x07 << 0)
+
+#define RK817_PMIC_CHRG_IN 0xe5
+#define RK817_USB_VLIM_EN (0x01 << 7)
+#define RK817_USB_VLIM_SEL (0x07 << 4)
+#define RK817_USB_ILIM_EN (0x01 << 3)
+#define RK817_USB_ILIM_SEL (0x07 << 0)
+#define RK817_PMIC_CHRG_TERM 0xe6
+#define RK817_CHRG_TERM_ANA_DIG (0x01 << 2)
+#define RK817_CHRG_TERM_ANA_SEL (0x03 << 0)
+#define RK817_CHRG_EN (0x01 << 6)
+
+#define RK817_PMIC_CHRG_STS 0xeb
+#define RK817_BAT_EXS BIT(7)
+#define RK817_CHG_STS (0x07 << 4)
+
#define RK817_ID_MSB 0xed
#define RK817_ID_LSB 0xee
#define RK817_SYS_STS 0xf0
+#define RK817_PLUG_IN_STS (0x1 << 6)
+
#define RK817_SYS_CFG(i) (0xf1 + (i))
#define RK817_ON_SOURCE_REG 0xf5
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index 27264fe4b3b9..e8bf90281ba0 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -102,7 +102,6 @@ struct tmio_mmc_data {
unsigned long capabilities2;
unsigned long flags;
u32 ocr_mask; /* available voltages */
- int alignment_shift;
dma_addr_t dma_rx_offset;
unsigned int max_blk_count;
unsigned short max_segs;
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 22c0a0cf5e0c..3ef77f52a4f0 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -62,6 +62,8 @@ extern const char *migrate_reason_names[MR_TYPES];
#ifdef CONFIG_MIGRATION
extern void putback_movable_pages(struct list_head *l);
+int migrate_folio_extra(struct address_space *mapping, struct folio *dst,
+ struct folio *src, enum migrate_mode mode, int extra_count);
int migrate_folio(struct address_space *mapping, struct folio *dst,
struct folio *src, enum migrate_mode mode);
extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free,
@@ -100,21 +102,6 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping,
#endif /* CONFIG_MIGRATION */
-#if defined(CONFIG_MIGRATION) && defined(CONFIG_NUMA)
-extern void set_migration_target_nodes(void);
-extern void migrate_on_reclaim_init(void);
-extern bool numa_demotion_enabled;
-extern int next_demotion_node(int node);
-#else
-static inline void set_migration_target_nodes(void) {}
-static inline void migrate_on_reclaim_init(void) {}
-static inline int next_demotion_node(int node)
-{
- return NUMA_NO_NODE;
-}
-#define numa_demotion_enabled false
-#endif
-
#ifdef CONFIG_COMPACTION
bool PageMovable(struct page *page);
void __SetPageMovable(struct page *page, const struct movable_operations *ops);
@@ -212,11 +199,24 @@ struct migrate_vma {
*/
void *pgmap_owner;
unsigned long flags;
+
+ /*
+ * Set to vmf->page if this is being called to migrate a page as part of
+ * a migrate_to_ram() callback.
+ */
+ struct page *fault_page;
};
int migrate_vma_setup(struct migrate_vma *args);
void migrate_vma_pages(struct migrate_vma *migrate);
void migrate_vma_finalize(struct migrate_vma *migrate);
+int migrate_device_range(unsigned long *src_pfns, unsigned long start,
+ unsigned long npages);
+void migrate_device_pages(unsigned long *src_pfns, unsigned long *dst_pfns,
+ unsigned long npages);
+void migrate_device_finalize(unsigned long *src_pfns,
+ unsigned long *dst_pfns, unsigned long npages);
+
#endif /* CONFIG_MIGRATION */
#endif /* _LINUX_MIGRATE_H */
diff --git a/include/linux/minmax.h b/include/linux/minmax.h
index 5433c08fcc68..396df1121bff 100644
--- a/include/linux/minmax.h
+++ b/include/linux/minmax.h
@@ -37,6 +37,28 @@
__cmp(x, y, op), \
__cmp_once(x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y), op))
+#define __clamp(val, lo, hi) \
+ ((val) >= (hi) ? (hi) : ((val) <= (lo) ? (lo) : (val)))
+
+#define __clamp_once(val, lo, hi, unique_val, unique_lo, unique_hi) ({ \
+ typeof(val) unique_val = (val); \
+ typeof(lo) unique_lo = (lo); \
+ typeof(hi) unique_hi = (hi); \
+ __clamp(unique_val, unique_lo, unique_hi); })
+
+#define __clamp_input_check(lo, hi) \
+ (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \
+ __is_constexpr((lo) > (hi)), (lo) > (hi), false)))
+
+#define __careful_clamp(val, lo, hi) ({ \
+ __clamp_input_check(lo, hi) + \
+ __builtin_choose_expr(__typecheck(val, lo) && __typecheck(val, hi) && \
+ __typecheck(hi, lo) && __is_constexpr(val) && \
+ __is_constexpr(lo) && __is_constexpr(hi), \
+ __clamp(val, lo, hi), \
+ __clamp_once(val, lo, hi, __UNIQUE_ID(__val), \
+ __UNIQUE_ID(__lo), __UNIQUE_ID(__hi))); })
+
/**
* min - return minimum of two values of the same or compatible types
* @x: first value
@@ -86,7 +108,7 @@
* This macro does strict typechecking of @lo/@hi to make sure they are of the
* same type as @val. See the unnecessary pointer comparisons.
*/
-#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
+#define clamp(val, lo, hi) __careful_clamp(val, lo, hi)
/*
* ..and if you can't take the strict
@@ -121,7 +143,7 @@
* This macro does no typechecking and uses temporary variables of type
* @type to make all the comparisons.
*/
-#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi)
+#define clamp_t(type, val, lo, hi) __careful_clamp((type)(val), (type)(lo), (type)(hi))
/**
* clamp_val - return a value clamped to a given range using val's type
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index b5f58fd37a0f..5fe5d198b57a 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -290,10 +290,9 @@ enum {
MLX5_UMR_INLINE = (1 << 7),
};
-#define MLX5_UMR_KLM_ALIGNMENT 4
-#define MLX5_UMR_MTT_ALIGNMENT 0x40
-#define MLX5_UMR_MTT_MASK (MLX5_UMR_MTT_ALIGNMENT - 1)
-#define MLX5_UMR_MTT_MIN_CHUNK_SIZE MLX5_UMR_MTT_ALIGNMENT
+#define MLX5_UMR_FLEX_ALIGNMENT 0x40
+#define MLX5_UMR_MTT_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_FLEX_ALIGNMENT / sizeof(struct mlx5_mtt))
+#define MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_FLEX_ALIGNMENT / sizeof(struct mlx5_klm))
#define MLX5_USER_INDEX_LEN (MLX5_FLD_SZ_BYTES(qpc, user_index) * 8)
@@ -325,6 +324,7 @@ enum mlx5_event {
MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR = 0x10,
MLX5_EVENT_TYPE_WQ_ACCESS_ERROR = 0x11,
MLX5_EVENT_TYPE_SRQ_CATAS_ERROR = 0x12,
+ MLX5_EVENT_TYPE_OBJECT_CHANGE = 0x27,
MLX5_EVENT_TYPE_INTERNAL_ERROR = 0x08,
MLX5_EVENT_TYPE_PORT_CHANGE = 0x09,
@@ -699,6 +699,12 @@ struct mlx5_eqe_temp_warning {
__be64 sensor_warning_lsb;
} __packed;
+struct mlx5_eqe_obj_change {
+ u8 rsvd0[2];
+ __be16 obj_type;
+ __be32 obj_id;
+} __packed;
+
#define SYNC_RST_STATE_MASK 0xf
enum sync_rst_state_type {
@@ -737,6 +743,7 @@ union ev_data {
struct mlx5_eqe_xrq_err xrq_err;
struct mlx5_eqe_sync_fw_update sync_fw_update;
struct mlx5_eqe_vhca_state vhca_state;
+ struct mlx5_eqe_obj_change obj_change;
} __packed;
struct mlx5_eqe {
@@ -890,11 +897,6 @@ static inline u8 get_cqe_l4_hdr_type(struct mlx5_cqe64 *cqe)
return (cqe->l4_l3_hdr_type >> 4) & 0x7;
}
-static inline u8 get_cqe_l3_hdr_type(struct mlx5_cqe64 *cqe)
-{
- return (cqe->l4_l3_hdr_type >> 2) & 0x3;
-}
-
static inline bool cqe_is_tunneled(struct mlx5_cqe64 *cqe)
{
return cqe->tls_outer_l3_tunneled & 0x1;
@@ -1198,8 +1200,10 @@ enum mlx5_cap_type {
MLX5_CAP_DEV_EVENT = 0x14,
MLX5_CAP_IPSEC,
MLX5_CAP_DEV_SHAMPO = 0x1d,
+ MLX5_CAP_MACSEC = 0x1f,
MLX5_CAP_GENERAL_2 = 0x20,
MLX5_CAP_PORT_SELECTION = 0x25,
+ MLX5_CAP_ADV_VIRTUALIZATION = 0x26,
/* NUM OF CAP Types */
MLX5_CAP_NUM
};
@@ -1365,6 +1369,14 @@ enum mlx5_qcam_feature_groups {
MLX5_GET(port_selection_cap, \
mdev->caps.hca[MLX5_CAP_PORT_SELECTION]->max, cap)
+#define MLX5_CAP_ADV_VIRTUALIZATION(mdev, cap) \
+ MLX5_GET(adv_virtualization_cap, \
+ mdev->caps.hca[MLX5_CAP_ADV_VIRTUALIZATION]->cur, cap)
+
+#define MLX5_CAP_ADV_VIRTUALIZATION_MAX(mdev, cap) \
+ MLX5_GET(adv_virtualization_cap, \
+ mdev->caps.hca[MLX5_CAP_ADV_VIRTUALIZATION]->max, cap)
+
#define MLX5_CAP_FLOWTABLE_PORT_SELECTION(mdev, cap) \
MLX5_CAP_PORT_SELECTION(mdev, flow_table_properties_port_selection.cap)
@@ -1446,6 +1458,9 @@ enum mlx5_qcam_feature_groups {
#define MLX5_CAP_DEV_SHAMPO(mdev, cap)\
MLX5_GET(shampo_cap, mdev->caps.hca_cur[MLX5_CAP_DEV_SHAMPO], cap)
+#define MLX5_CAP_MACSEC(mdev, cap)\
+ MLX5_GET(macsec_cap, (mdev)->caps.hca[MLX5_CAP_MACSEC]->cur, cap)
+
enum {
MLX5_CMD_STAT_OK = 0x0,
MLX5_CMD_STAT_INT_ERR = 0x1,
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 96b16fbe1aa4..d476255c9a3f 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -606,8 +606,6 @@ struct mlx5_priv {
struct list_head pgdir_list;
/* end: alloc staff */
- struct list_head ctx_list;
- spinlock_t ctx_lock;
struct mlx5_adev **adev;
int adev_idx;
int sw_vhca_id;
@@ -698,6 +696,8 @@ struct mlx5_pps {
struct work_struct out_work;
u64 start[MAX_PIN_NUM];
u8 enabled;
+ u64 min_npps_period;
+ u64 min_out_pulse_duration_ns;
};
struct mlx5_timer {
@@ -779,6 +779,7 @@ struct mlx5_core_dev {
enum mlx5_device_state state;
/* sync interface state */
struct mutex intf_state_mutex;
+ struct lock_class_key lock_key;
unsigned long intf_state;
struct mlx5_priv priv;
struct mlx5_profile profile;
@@ -854,11 +855,6 @@ struct mlx5_cmd_work_ent {
refcount_t refcnt;
};
-struct mlx5_pas {
- u64 pa;
- u8 log_sz;
-};
-
enum phy_port_state {
MLX5_AAA_111
};
@@ -972,7 +968,7 @@ void mlx5_cmd_allowed_opcode(struct mlx5_core_dev *dev, u16 opcode);
struct mlx5_async_ctx {
struct mlx5_core_dev *dev;
atomic_t num_inflight;
- struct wait_queue_head wait;
+ struct completion inflight_done;
};
struct mlx5_async_work;
@@ -983,6 +979,7 @@ struct mlx5_async_work {
struct mlx5_async_ctx *ctx;
mlx5_async_cbk_t user_callback;
u16 opcode; /* cmd opcode */
+ u16 op_mod; /* cmd op_mod */
void *out; /* pointer to the cmd output buffer */
};
@@ -1015,11 +1012,11 @@ int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size,
bool mlx5_cmd_is_down(struct mlx5_core_dev *dev);
int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type);
-void mlx5_health_flush(struct mlx5_core_dev *dev);
void mlx5_health_cleanup(struct mlx5_core_dev *dev);
int mlx5_health_init(struct mlx5_core_dev *dev);
void mlx5_start_health_poll(struct mlx5_core_dev *dev);
void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health);
+void mlx5_start_health_fw_log_up(struct mlx5_core_dev *dev);
void mlx5_drain_health_wq(struct mlx5_core_dev *dev);
void mlx5_trigger_health_work(struct mlx5_core_dev *dev);
int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size,
@@ -1084,8 +1081,6 @@ int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num);
void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common);
int mlx5_query_odp_caps(struct mlx5_core_dev *dev,
struct mlx5_odp_caps *odp_caps);
-int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev,
- u8 port_num, void *out, size_t sz);
int mlx5_init_rl_table(struct mlx5_core_dev *dev);
void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev);
@@ -1152,6 +1147,7 @@ int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev);
bool mlx5_lag_is_roce(struct mlx5_core_dev *dev);
bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev);
bool mlx5_lag_is_active(struct mlx5_core_dev *dev);
+bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev);
bool mlx5_lag_is_master(struct mlx5_core_dev *dev);
bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev);
struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev);
@@ -1279,16 +1275,21 @@ enum {
MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32,
};
-static inline bool mlx5_is_roce_init_enabled(struct mlx5_core_dev *dev)
+bool mlx5_is_roce_on(struct mlx5_core_dev *dev);
+
+static inline bool mlx5_get_roce_state(struct mlx5_core_dev *dev)
{
- struct devlink *devlink = priv_to_devlink(dev);
- union devlink_param_value val;
- int err;
-
- err = devlink_param_driverinit_value_get(devlink,
- DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE,
- &val);
- return err ? MLX5_CAP_GEN(dev, roce) : val.vbool;
+ if (MLX5_CAP_GEN(dev, roce_rw_supported))
+ return MLX5_CAP_GEN(dev, roce);
+
+ /* If RoCE cap is read-only in FW, get RoCE state from devlink
+ * in order to support RoCE enable/disable feature
+ */
+ return mlx5_is_roce_on(dev);
}
+enum {
+ MLX5_OCTWORD = 16,
+};
+
#endif /* MLX5_DRIVER_H */
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 8e73c377da2c..ba6958b49a8e 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -50,6 +50,7 @@ enum mlx5_flow_destination_type {
MLX5_FLOW_DESTINATION_TYPE_PORT,
MLX5_FLOW_DESTINATION_TYPE_COUNTER,
MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM,
+ MLX5_FLOW_DESTINATION_TYPE_RANGE,
};
enum {
@@ -79,6 +80,7 @@ static inline void build_leftovers_ft_param(int *priority,
enum mlx5_flow_namespace_type {
MLX5_FLOW_NAMESPACE_BYPASS,
+ MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC,
MLX5_FLOW_NAMESPACE_LAG,
MLX5_FLOW_NAMESPACE_OFFLOADS,
MLX5_FLOW_NAMESPACE_ETHTOOL,
@@ -92,7 +94,8 @@ enum mlx5_flow_namespace_type {
MLX5_FLOW_NAMESPACE_SNIFFER_RX,
MLX5_FLOW_NAMESPACE_SNIFFER_TX,
MLX5_FLOW_NAMESPACE_EGRESS,
- MLX5_FLOW_NAMESPACE_EGRESS_KERNEL,
+ MLX5_FLOW_NAMESPACE_EGRESS_IPSEC,
+ MLX5_FLOW_NAMESPACE_EGRESS_MACSEC,
MLX5_FLOW_NAMESPACE_RDMA_RX,
MLX5_FLOW_NAMESPACE_RDMA_RX_KERNEL,
MLX5_FLOW_NAMESPACE_RDMA_TX,
@@ -141,6 +144,10 @@ enum {
MLX5_FLOW_DEST_VPORT_REFORMAT_ID = BIT(1),
};
+enum mlx5_flow_dest_range_field {
+ MLX5_FLOW_DEST_RANGE_FIELD_PKT_LEN = 0,
+};
+
struct mlx5_flow_destination {
enum mlx5_flow_destination_type type;
union {
@@ -154,6 +161,13 @@ struct mlx5_flow_destination {
struct mlx5_pkt_reformat *pkt_reformat;
u8 flags;
} vport;
+ struct {
+ struct mlx5_flow_table *hit_ft;
+ struct mlx5_flow_table *miss_ft;
+ enum mlx5_flow_dest_range_field field;
+ u32 min;
+ u32 max;
+ } range;
u32 sampler_id;
};
};
@@ -243,10 +257,10 @@ struct mlx5_flow_act {
u32 action;
struct mlx5_modify_hdr *modify_hdr;
struct mlx5_pkt_reformat *pkt_reformat;
- union {
- u32 ipsec_obj_id;
- uintptr_t esp_id;
- };
+ struct mlx5_flow_act_crypto_params {
+ u8 type;
+ u32 obj_id;
+ } crypto;
u32 flags;
struct mlx5_fs_vlan vlan[MLX5_FS_VLAN_DEPTH];
struct ib_counters *counters;
diff --git a/include/linux/mlx5/fs_helpers.h b/include/linux/mlx5/fs_helpers.h
index 9db21cd0e92c..bc5125bc0561 100644
--- a/include/linux/mlx5/fs_helpers.h
+++ b/include/linux/mlx5/fs_helpers.h
@@ -38,46 +38,6 @@
#define MLX5_FS_IPV4_VERSION 4
#define MLX5_FS_IPV6_VERSION 6
-static inline bool mlx5_fs_is_ipsec_flow(const u32 *match_c)
-{
- void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
- misc_parameters);
-
- return MLX5_GET(fte_match_set_misc, misc_params_c, outer_esp_spi);
-}
-
-static inline bool _mlx5_fs_is_outer_ipproto_flow(const u32 *match_c,
- const u32 *match_v, u8 match)
-{
- const void *headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
- outer_headers);
- const void *headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
- outer_headers);
-
- return MLX5_GET(fte_match_set_lyr_2_4, headers_c, ip_protocol) == 0xff &&
- MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol) == match;
-}
-
-static inline bool mlx5_fs_is_outer_tcp_flow(const u32 *match_c,
- const u32 *match_v)
-{
- return _mlx5_fs_is_outer_ipproto_flow(match_c, match_v, IPPROTO_TCP);
-}
-
-static inline bool mlx5_fs_is_outer_udp_flow(const u32 *match_c,
- const u32 *match_v)
-{
- return _mlx5_fs_is_outer_ipproto_flow(match_c, match_v, IPPROTO_UDP);
-}
-
-static inline bool mlx5_fs_is_vxlan_flow(const u32 *match_c)
-{
- void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
- misc_parameters);
-
- return MLX5_GET(fte_match_set_misc, misc_params_c, vxlan_vni);
-}
-
static inline bool _mlx5_fs_is_outer_ipv_flow(struct mlx5_core_dev *mdev,
const u32 *match_c,
const u32 *match_v, int version)
@@ -131,12 +91,4 @@ mlx5_fs_is_outer_ipv6_flow(struct mlx5_core_dev *mdev, const u32 *match_c,
MLX5_FS_IPV6_VERSION);
}
-static inline bool mlx5_fs_is_outer_ipsec_flow(const u32 *match_c)
-{
- void *misc_params_c =
- MLX5_ADDR_OF(fte_match_param, match_c, misc_parameters);
-
- return MLX5_GET(fte_match_set_misc, misc_params_c, outer_esp_spi);
-}
-
#endif
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 4acd5610e96b..152d2d7f8743 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -68,6 +68,8 @@ enum {
MLX5_SET_HCA_CAP_OP_MOD_ODP = 0x2,
MLX5_SET_HCA_CAP_OP_MOD_ATOMIC = 0x3,
MLX5_SET_HCA_CAP_OP_MOD_ROCE = 0x4,
+ MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE2 = 0x20,
+ MLX5_SET_HCA_CAP_OP_MODE_PORT_SELECTION = 0x25,
};
enum {
@@ -82,6 +84,7 @@ enum {
MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM = (1ULL << MLX5_OBJ_TYPE_SW_ICM),
MLX5_GENERAL_OBJ_TYPES_CAP_GENEVE_TLV_OPT = (1ULL << 11),
MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q = (1ULL << 13),
+ MLX5_GENERAL_OBJ_TYPES_CAP_MACSEC_OFFLOAD = (1ULL << 39),
};
enum {
@@ -89,6 +92,7 @@ enum {
MLX5_OBJ_TYPE_VIRTIO_NET_Q = 0x000d,
MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS = 0x001c,
MLX5_OBJ_TYPE_MATCH_DEFINER = 0x0018,
+ MLX5_OBJ_TYPE_PAGE_TRACK = 0x46,
MLX5_OBJ_TYPE_MKEY = 0xff01,
MLX5_OBJ_TYPE_QP = 0xff02,
MLX5_OBJ_TYPE_PSV = 0xff03,
@@ -442,14 +446,22 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
u8 max_modify_header_actions[0x8];
u8 max_ft_level[0x8];
- u8 reserved_at_40[0x6];
+ u8 reformat_add_esp_trasport[0x1];
+ u8 reserved_at_41[0x2];
+ u8 reformat_del_esp_trasport[0x1];
+ u8 reserved_at_44[0x2];
u8 execute_aso[0x1];
u8 reserved_at_47[0x19];
u8 reserved_at_60[0x2];
u8 reformat_insert[0x1];
u8 reformat_remove[0x1];
- u8 reserver_at_64[0x14];
+ u8 macsec_encrypt[0x1];
+ u8 macsec_decrypt[0x1];
+ u8 reserved_at_66[0x2];
+ u8 reformat_add_macsec[0x1];
+ u8 reformat_remove_macsec[0x1];
+ u8 reserved_at_6a[0xe];
u8 log_max_ft_num[0x8];
u8 reserved_at_80[0x10];
@@ -476,6 +488,22 @@ struct mlx5_ifc_odp_per_transport_service_cap_bits {
u8 reserved_at_6[0x1a];
};
+struct mlx5_ifc_ipv4_layout_bits {
+ u8 reserved_at_0[0x60];
+
+ u8 ipv4[0x20];
+};
+
+struct mlx5_ifc_ipv6_layout_bits {
+ u8 ipv6[16][0x8];
+};
+
+union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits {
+ struct mlx5_ifc_ipv6_layout_bits ipv6_layout;
+ struct mlx5_ifc_ipv4_layout_bits ipv4_layout;
+ u8 reserved_at_0[0x80];
+};
+
struct mlx5_ifc_fte_match_set_lyr_2_4_bits {
u8 smac_47_16[0x20];
@@ -611,7 +639,13 @@ struct mlx5_ifc_fte_match_set_misc2_bits {
u8 metadata_reg_a[0x20];
- u8 reserved_at_1a0[0x60];
+ u8 reserved_at_1a0[0x8];
+
+ u8 macsec_syndrome[0x8];
+ u8 ipsec_syndrome[0x8];
+ u8 reserved_at_1b8[0x8];
+
+ u8 reserved_at_1c0[0x40];
};
struct mlx5_ifc_fte_match_set_misc3_bits {
@@ -813,7 +847,9 @@ struct mlx5_ifc_flow_table_nic_cap_bits {
struct mlx5_ifc_port_selection_cap_bits {
u8 reserved_at_0[0x10];
u8 port_select_flow_table[0x1];
- u8 reserved_at_11[0xf];
+ u8 reserved_at_11[0x1];
+ u8 port_select_flow_table_bypass[0x1];
+ u8 reserved_at_13[0xd];
u8 reserved_at_20[0x1e0];
@@ -1276,6 +1312,24 @@ struct mlx5_ifc_ipsec_cap_bits {
u8 reserved_at_30[0x7d0];
};
+struct mlx5_ifc_macsec_cap_bits {
+ u8 macsec_epn[0x1];
+ u8 reserved_at_1[0x2];
+ u8 macsec_crypto_esp_aes_gcm_256_encrypt[0x1];
+ u8 macsec_crypto_esp_aes_gcm_128_encrypt[0x1];
+ u8 macsec_crypto_esp_aes_gcm_256_decrypt[0x1];
+ u8 macsec_crypto_esp_aes_gcm_128_decrypt[0x1];
+ u8 reserved_at_7[0x4];
+ u8 log_max_macsec_offload[0x5];
+ u8 reserved_at_10[0x10];
+
+ u8 min_log_macsec_full_replay_window[0x8];
+ u8 max_log_macsec_full_replay_window[0x8];
+ u8 reserved_at_30[0x10];
+
+ u8 reserved_at_40[0x7c0];
+};
+
enum {
MLX5_WQ_TYPE_LINKED_LIST = 0x0,
MLX5_WQ_TYPE_CYCLIC = 0x1,
@@ -1443,7 +1497,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 reserved_at_120[0xa];
u8 log_max_ra_req_dc[0x6];
- u8 reserved_at_130[0x9];
+ u8 reserved_at_130[0x2];
+ u8 eth_wqe_too_small[0x1];
+ u8 reserved_at_133[0x6];
u8 vnic_env_cq_overrun[0x1];
u8 log_max_ra_res_dc[0x6];
@@ -1707,7 +1763,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 steering_format_version[0x4];
u8 create_qp_start_hint[0x18];
- u8 reserved_at_460[0x3];
+ u8 reserved_at_460[0x1];
+ u8 ats[0x1];
+ u8 reserved_at_462[0x1];
u8 log_max_uctx[0x5];
u8 reserved_at_468[0x2];
u8 ipsec_offload[0x1];
@@ -1733,7 +1791,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 max_geneve_tlv_options[0x8];
u8 reserved_at_568[0x3];
u8 max_geneve_tlv_option_data_len[0x5];
- u8 reserved_at_570[0x10];
+ u8 reserved_at_570[0x9];
+ u8 adv_virtualization[0x1];
+ u8 reserved_at_57a[0x6];
u8 reserved_at_580[0xb];
u8 log_max_dci_stream_channels[0x5];
@@ -1821,14 +1881,23 @@ struct mlx5_ifc_cmd_hca_cap_bits {
};
struct mlx5_ifc_cmd_hca_cap_2_bits {
- u8 reserved_at_0[0xa0];
+ u8 reserved_at_0[0x80];
+
+ u8 migratable[0x1];
+ u8 reserved_at_81[0x1f];
u8 max_reformat_insert_size[0x8];
u8 max_reformat_insert_offset[0x8];
u8 max_reformat_remove_size[0x8];
u8 max_reformat_remove_offset[0x8];
- u8 reserved_at_c0[0x160];
+ u8 reserved_at_c0[0xe0];
+
+ u8 reserved_at_1a0[0xb];
+ u8 log_min_mkey_entity_size[0x5];
+ u8 reserved_at_1b0[0x10];
+
+ u8 reserved_at_1c0[0x60];
u8 reserved_at_220[0x1];
u8 sw_vhca_id_valid[0x1];
@@ -3295,6 +3364,7 @@ union mlx5_ifc_hca_cap_union_bits {
struct mlx5_ifc_device_mem_cap_bits device_mem_cap;
struct mlx5_ifc_virtio_emulation_cap_bits virtio_emulation_cap;
struct mlx5_ifc_shampo_cap_bits shampo_cap;
+ struct mlx5_ifc_macsec_cap_bits macsec_cap;
u8 reserved_at_0[0x8000];
};
@@ -3310,8 +3380,8 @@ enum {
MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH = 0x100,
MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2 = 0x400,
MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2 = 0x800,
- MLX5_FLOW_CONTEXT_ACTION_IPSEC_DECRYPT = 0x1000,
- MLX5_FLOW_CONTEXT_ACTION_IPSEC_ENCRYPT = 0x2000,
+ MLX5_FLOW_CONTEXT_ACTION_CRYPTO_DECRYPT = 0x1000,
+ MLX5_FLOW_CONTEXT_ACTION_CRYPTO_ENCRYPT = 0x2000,
MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO = 0x4000,
};
@@ -3321,6 +3391,11 @@ enum {
MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT = 0x2,
};
+enum {
+ MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_IPSEC = 0x0,
+ MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_MACSEC = 0x1,
+};
+
struct mlx5_ifc_vlan_bits {
u8 ethtype[0x10];
u8 prio[0x3];
@@ -3374,7 +3449,7 @@ struct mlx5_ifc_flow_context_bits {
u8 extended_destination[0x1];
u8 reserved_at_81[0x1];
u8 flow_source[0x2];
- u8 reserved_at_84[0x4];
+ u8 encrypt_decrypt_type[0x4];
u8 destination_list_size[0x18];
u8 reserved_at_a0[0x8];
@@ -3386,7 +3461,7 @@ struct mlx5_ifc_flow_context_bits {
struct mlx5_ifc_vlan_bits push_vlan_2;
- u8 ipsec_obj_id[0x20];
+ u8 encrypt_decrypt_obj_id[0x20];
u8 reserved_at_140[0xc0];
struct mlx5_ifc_fte_match_param_bits match_value;
@@ -3475,7 +3550,9 @@ struct mlx5_ifc_vnic_diagnostic_statistics_bits {
u8 cq_overrun[0x20];
- u8 reserved_at_220[0xde0];
+ u8 eth_wqe_too_small[0x20];
+
+ u8 reserved_at_220[0xdc0];
};
struct mlx5_ifc_traffic_counter_bits {
@@ -3873,7 +3950,9 @@ struct mlx5_ifc_mkc_bits {
u8 lw[0x1];
u8 lr[0x1];
u8 access_mode_1_0[0x2];
- u8 reserved_at_18[0x8];
+ u8 reserved_at_18[0x2];
+ u8 ma_translation_mode[0x2];
+ u8 reserved_at_1c[0x4];
u8 qpn[0x18];
u8 mkey_7_0[0x8];
@@ -6034,6 +6113,38 @@ struct mlx5_ifc_match_definer_format_32_bits {
u8 inner_dmac_15_0[0x10];
};
+enum {
+ MLX5_IFC_DEFINER_FORMAT_ID_SELECT = 61,
+};
+
+#define MLX5_IFC_DEFINER_FORMAT_OFFSET_UNUSED 0x0
+#define MLX5_IFC_DEFINER_FORMAT_OFFSET_OUTER_ETH_PKT_LEN 0x48
+#define MLX5_IFC_DEFINER_DW_SELECTORS_NUM 9
+#define MLX5_IFC_DEFINER_BYTE_SELECTORS_NUM 8
+
+struct mlx5_ifc_match_definer_match_mask_bits {
+ u8 reserved_at_1c0[5][0x20];
+ u8 match_dw_8[0x20];
+ u8 match_dw_7[0x20];
+ u8 match_dw_6[0x20];
+ u8 match_dw_5[0x20];
+ u8 match_dw_4[0x20];
+ u8 match_dw_3[0x20];
+ u8 match_dw_2[0x20];
+ u8 match_dw_1[0x20];
+ u8 match_dw_0[0x20];
+
+ u8 match_byte_7[0x8];
+ u8 match_byte_6[0x8];
+ u8 match_byte_5[0x8];
+ u8 match_byte_4[0x8];
+
+ u8 match_byte_3[0x8];
+ u8 match_byte_2[0x8];
+ u8 match_byte_1[0x8];
+ u8 match_byte_0[0x8];
+};
+
struct mlx5_ifc_match_definer_bits {
u8 modify_field_select[0x40];
@@ -6042,9 +6153,41 @@ struct mlx5_ifc_match_definer_bits {
u8 reserved_at_80[0x10];
u8 format_id[0x10];
- u8 reserved_at_a0[0x160];
+ u8 reserved_at_a0[0x60];
+
+ u8 format_select_dw3[0x8];
+ u8 format_select_dw2[0x8];
+ u8 format_select_dw1[0x8];
+ u8 format_select_dw0[0x8];
+
+ u8 format_select_dw7[0x8];
+ u8 format_select_dw6[0x8];
+ u8 format_select_dw5[0x8];
+ u8 format_select_dw4[0x8];
- u8 match_mask[16][0x20];
+ u8 reserved_at_100[0x18];
+ u8 format_select_dw8[0x8];
+
+ u8 reserved_at_120[0x20];
+
+ u8 format_select_byte3[0x8];
+ u8 format_select_byte2[0x8];
+ u8 format_select_byte1[0x8];
+ u8 format_select_byte0[0x8];
+
+ u8 format_select_byte7[0x8];
+ u8 format_select_byte6[0x8];
+ u8 format_select_byte5[0x8];
+ u8 format_select_byte4[0x8];
+
+ u8 reserved_at_180[0x40];
+
+ union {
+ struct {
+ u8 match_mask[16][0x20];
+ };
+ struct mlx5_ifc_match_definer_match_mask_bits match_mask_format;
+ };
};
struct mlx5_ifc_general_obj_in_cmd_hdr_bits {
@@ -6314,8 +6457,13 @@ enum mlx5_reformat_ctx_type {
MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL = 0x2,
MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2 = 0x3,
MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x4,
+ MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV4 = 0x5,
+ MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT = 0x8,
+ MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV6 = 0xb,
MLX5_REFORMAT_TYPE_INSERT_HDR = 0xf,
MLX5_REFORMAT_TYPE_REMOVE_HDR = 0x10,
+ MLX5_REFORMAT_TYPE_ADD_MACSEC = 0x11,
+ MLX5_REFORMAT_TYPE_DEL_MACSEC = 0x12,
};
struct mlx5_ifc_alloc_packet_reformat_context_in_bits {
@@ -9789,7 +9937,9 @@ struct mlx5_ifc_pcam_reg_bits {
struct mlx5_ifc_mcam_enhanced_features_bits {
u8 reserved_at_0[0x5d];
u8 mcia_32dwords[0x1];
- u8 reserved_at_5e[0xc];
+ u8 out_pulse_duration_ns[0x1];
+ u8 npps_period[0x1];
+ u8 reserved_at_60[0xa];
u8 reset_state[0x1];
u8 ptpcyc2realtime_modify[0x1];
u8 reserved_at_6c[0x2];
@@ -10289,7 +10439,12 @@ struct mlx5_ifc_mtpps_reg_bits {
u8 reserved_at_18[0x4];
u8 cap_max_num_of_pps_out_pins[0x4];
- u8 reserved_at_20[0x24];
+ u8 reserved_at_20[0x13];
+ u8 cap_log_min_npps_period[0x5];
+ u8 reserved_at_38[0x3];
+ u8 cap_log_min_out_pulse_duration_ns[0x5];
+
+ u8 reserved_at_40[0x4];
u8 cap_pin_3_mode[0x4];
u8 reserved_at_48[0x4];
u8 cap_pin_2_mode[0x4];
@@ -10308,7 +10463,9 @@ struct mlx5_ifc_mtpps_reg_bits {
u8 cap_pin_4_mode[0x4];
u8 field_select[0x20];
- u8 reserved_at_a0[0x60];
+ u8 reserved_at_a0[0x20];
+
+ u8 npps_period[0x40];
u8 enable[0x1];
u8 reserved_at_101[0xb];
@@ -10317,7 +10474,8 @@ struct mlx5_ifc_mtpps_reg_bits {
u8 pin_mode[0x4];
u8 pin[0x8];
- u8 reserved_at_120[0x20];
+ u8 reserved_at_120[0x2];
+ u8 out_pulse_duration_ns[0x1e];
u8 time_stamp[0x40];
@@ -10920,7 +11078,9 @@ struct mlx5_ifc_lagc_bits {
u8 reserved_at_18[0x5];
u8 lag_state[0x3];
- u8 reserved_at_20[0x14];
+ u8 reserved_at_20[0xc];
+ u8 active_port[0x4];
+ u8 reserved_at_30[0x4];
u8 tx_remap_affinity_2[0x4];
u8 reserved_at_38[0x4];
u8 tx_remap_affinity_1[0x4];
@@ -11134,7 +11294,8 @@ struct mlx5_ifc_dealloc_memic_out_bits {
struct mlx5_ifc_umem_bits {
u8 reserved_at_0[0x80];
- u8 reserved_at_80[0x1b];
+ u8 ats[0x1];
+ u8 reserved_at_81[0x1a];
u8 log_page_size[0x5];
u8 page_offset[0x20];
@@ -11471,12 +11632,48 @@ enum {
MLX5_GENERAL_OBJECT_TYPES_IPSEC = 0x13,
MLX5_GENERAL_OBJECT_TYPES_SAMPLER = 0x20,
MLX5_GENERAL_OBJECT_TYPES_FLOW_METER_ASO = 0x24,
+ MLX5_GENERAL_OBJECT_TYPES_MACSEC = 0x27,
};
enum {
MLX5_IPSEC_OBJECT_ICV_LEN_16B,
};
+enum {
+ MLX5_IPSEC_ASO_REG_C_0_1 = 0x0,
+ MLX5_IPSEC_ASO_REG_C_2_3 = 0x1,
+ MLX5_IPSEC_ASO_REG_C_4_5 = 0x2,
+ MLX5_IPSEC_ASO_REG_C_6_7 = 0x3,
+};
+
+enum {
+ MLX5_IPSEC_ASO_MODE = 0x0,
+ MLX5_IPSEC_ASO_REPLAY_PROTECTION = 0x1,
+ MLX5_IPSEC_ASO_INC_SN = 0x2,
+};
+
+struct mlx5_ifc_ipsec_aso_bits {
+ u8 valid[0x1];
+ u8 reserved_at_201[0x1];
+ u8 mode[0x2];
+ u8 window_sz[0x2];
+ u8 soft_lft_arm[0x1];
+ u8 hard_lft_arm[0x1];
+ u8 remove_flow_enable[0x1];
+ u8 esn_event_arm[0x1];
+ u8 reserved_at_20a[0x16];
+
+ u8 remove_flow_pkt_cnt[0x20];
+
+ u8 remove_flow_soft_lft[0x20];
+
+ u8 reserved_at_260[0x80];
+
+ u8 mode_parameter[0x20];
+
+ u8 replay_protection_window[0x100];
+};
+
struct mlx5_ifc_ipsec_obj_bits {
u8 modify_field_select[0x40];
u8 full_offload[0x1];
@@ -11498,7 +11695,11 @@ struct mlx5_ifc_ipsec_obj_bits {
u8 implicit_iv[0x40];
- u8 reserved_at_100[0x700];
+ u8 reserved_at_100[0x8];
+ u8 ipsec_aso_access_pd[0x18];
+ u8 reserved_at_120[0xe0];
+
+ struct mlx5_ifc_ipsec_aso_bits ipsec_aso;
};
struct mlx5_ifc_create_ipsec_obj_in_bits {
@@ -11521,6 +11722,96 @@ struct mlx5_ifc_modify_ipsec_obj_in_bits {
struct mlx5_ifc_ipsec_obj_bits ipsec_object;
};
+enum {
+ MLX5_MACSEC_ASO_REPLAY_PROTECTION = 0x1,
+};
+
+enum {
+ MLX5_MACSEC_ASO_REPLAY_WIN_32BIT = 0x0,
+ MLX5_MACSEC_ASO_REPLAY_WIN_64BIT = 0x1,
+ MLX5_MACSEC_ASO_REPLAY_WIN_128BIT = 0x2,
+ MLX5_MACSEC_ASO_REPLAY_WIN_256BIT = 0x3,
+};
+
+#define MLX5_MACSEC_ASO_INC_SN 0x2
+#define MLX5_MACSEC_ASO_REG_C_4_5 0x2
+
+struct mlx5_ifc_macsec_aso_bits {
+ u8 valid[0x1];
+ u8 reserved_at_1[0x1];
+ u8 mode[0x2];
+ u8 window_size[0x2];
+ u8 soft_lifetime_arm[0x1];
+ u8 hard_lifetime_arm[0x1];
+ u8 remove_flow_enable[0x1];
+ u8 epn_event_arm[0x1];
+ u8 reserved_at_a[0x16];
+
+ u8 remove_flow_packet_count[0x20];
+
+ u8 remove_flow_soft_lifetime[0x20];
+
+ u8 reserved_at_60[0x80];
+
+ u8 mode_parameter[0x20];
+
+ u8 replay_protection_window[8][0x20];
+};
+
+struct mlx5_ifc_macsec_offload_obj_bits {
+ u8 modify_field_select[0x40];
+
+ u8 confidentiality_en[0x1];
+ u8 reserved_at_41[0x1];
+ u8 epn_en[0x1];
+ u8 epn_overlap[0x1];
+ u8 reserved_at_44[0x2];
+ u8 confidentiality_offset[0x2];
+ u8 reserved_at_48[0x4];
+ u8 aso_return_reg[0x4];
+ u8 reserved_at_50[0x10];
+
+ u8 epn_msb[0x20];
+
+ u8 reserved_at_80[0x8];
+ u8 dekn[0x18];
+
+ u8 reserved_at_a0[0x20];
+
+ u8 sci[0x40];
+
+ u8 reserved_at_100[0x8];
+ u8 macsec_aso_access_pd[0x18];
+
+ u8 reserved_at_120[0x60];
+
+ u8 salt[3][0x20];
+
+ u8 reserved_at_1e0[0x20];
+
+ struct mlx5_ifc_macsec_aso_bits macsec_aso;
+};
+
+struct mlx5_ifc_create_macsec_obj_in_bits {
+ struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_in_cmd_hdr;
+ struct mlx5_ifc_macsec_offload_obj_bits macsec_object;
+};
+
+struct mlx5_ifc_modify_macsec_obj_in_bits {
+ struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_in_cmd_hdr;
+ struct mlx5_ifc_macsec_offload_obj_bits macsec_object;
+};
+
+enum {
+ MLX5_MODIFY_MACSEC_BITMASK_EPN_OVERLAP = BIT(0),
+ MLX5_MODIFY_MACSEC_BITMASK_EPN_MSB = BIT(1),
+};
+
+struct mlx5_ifc_query_macsec_obj_out_bits {
+ struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr;
+ struct mlx5_ifc_macsec_offload_obj_bits macsec_object;
+};
+
struct mlx5_ifc_encryption_key_obj_bits {
u8 modify_field_select[0x40];
@@ -11638,6 +11929,7 @@ enum {
enum {
MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_TLS = 0x1,
MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_IPSEC = 0x2,
+ MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_MACSEC = 0x4,
};
struct mlx5_ifc_tls_static_params_bits {
@@ -11818,4 +12110,82 @@ struct mlx5_ifc_load_vhca_state_out_bits {
u8 reserved_at_40[0x40];
};
+struct mlx5_ifc_adv_virtualization_cap_bits {
+ u8 reserved_at_0[0x3];
+ u8 pg_track_log_max_num[0x5];
+ u8 pg_track_max_num_range[0x8];
+ u8 pg_track_log_min_addr_space[0x8];
+ u8 pg_track_log_max_addr_space[0x8];
+
+ u8 reserved_at_20[0x3];
+ u8 pg_track_log_min_msg_size[0x5];
+ u8 reserved_at_28[0x3];
+ u8 pg_track_log_max_msg_size[0x5];
+ u8 reserved_at_30[0x3];
+ u8 pg_track_log_min_page_size[0x5];
+ u8 reserved_at_38[0x3];
+ u8 pg_track_log_max_page_size[0x5];
+
+ u8 reserved_at_40[0x7c0];
+};
+
+struct mlx5_ifc_page_track_report_entry_bits {
+ u8 dirty_address_high[0x20];
+
+ u8 dirty_address_low[0x20];
+};
+
+enum {
+ MLX5_PAGE_TRACK_STATE_TRACKING,
+ MLX5_PAGE_TRACK_STATE_REPORTING,
+ MLX5_PAGE_TRACK_STATE_ERROR,
+};
+
+struct mlx5_ifc_page_track_range_bits {
+ u8 start_address[0x40];
+
+ u8 length[0x40];
+};
+
+struct mlx5_ifc_page_track_bits {
+ u8 modify_field_select[0x40];
+
+ u8 reserved_at_40[0x10];
+ u8 vhca_id[0x10];
+
+ u8 reserved_at_60[0x20];
+
+ u8 state[0x4];
+ u8 track_type[0x4];
+ u8 log_addr_space_size[0x8];
+ u8 reserved_at_90[0x3];
+ u8 log_page_size[0x5];
+ u8 reserved_at_98[0x3];
+ u8 log_msg_size[0x5];
+
+ u8 reserved_at_a0[0x8];
+ u8 reporting_qpn[0x18];
+
+ u8 reserved_at_c0[0x18];
+ u8 num_ranges[0x8];
+
+ u8 reserved_at_e0[0x20];
+
+ u8 range_start_address[0x40];
+
+ u8 length[0x40];
+
+ struct mlx5_ifc_page_track_range_bits track_range[0];
+};
+
+struct mlx5_ifc_create_page_track_obj_in_bits {
+ struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_in_cmd_hdr;
+ struct mlx5_ifc_page_track_bits obj_context;
+};
+
+struct mlx5_ifc_modify_page_track_obj_in_bits {
+ struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_in_cmd_hdr;
+ struct mlx5_ifc_page_track_bits obj_context;
+};
+
#endif /* MLX5_IFC_H */
diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h
index 45c7c0d67635..0596472923ad 100644
--- a/include/linux/mlx5/mlx5_ifc_fpga.h
+++ b/include/linux/mlx5/mlx5_ifc_fpga.h
@@ -32,30 +32,6 @@
#ifndef MLX5_IFC_FPGA_H
#define MLX5_IFC_FPGA_H
-struct mlx5_ifc_ipv4_layout_bits {
- u8 reserved_at_0[0x60];
-
- u8 ipv4[0x20];
-};
-
-struct mlx5_ifc_ipv6_layout_bits {
- u8 ipv6[16][0x8];
-};
-
-union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits {
- struct mlx5_ifc_ipv6_layout_bits ipv6_layout;
- struct mlx5_ifc_ipv4_layout_bits ipv4_layout;
- u8 reserved_at_0[0x80];
-};
-
-enum {
- MLX5_FPGA_CAP_SANDBOX_VENDOR_ID_MLNX = 0x2c9,
-};
-
-enum {
- MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_IPSEC = 0x2,
-};
-
struct mlx5_ifc_fpga_shell_caps_bits {
u8 max_num_qps[0x10];
u8 reserved_at_10[0x8];
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 8bda3ba5b109..4657d5c54abe 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -162,6 +162,8 @@ enum {
MLX5_SEND_WQE_MAX_WQEBBS = 16,
};
+#define MLX5_SEND_WQE_MAX_SIZE (MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQE_BB)
+
enum {
MLX5_WQE_FMR_PERM_LOCAL_READ = 1 << 27,
MLX5_WQE_FMR_PERM_LOCAL_WRITE = 1 << 28,
@@ -252,6 +254,7 @@ enum {
enum {
MLX5_ETH_WQE_FT_META_IPSEC = BIT(0),
+ MLX5_ETH_WQE_FT_META_MACSEC = BIT(1),
};
struct mlx5_wqe_eth_seg {
@@ -475,6 +478,12 @@ struct mlx5_klm {
__be64 va;
};
+struct mlx5_ksm {
+ __be32 reserved;
+ __be32 key;
+ __be64 va;
+};
+
struct mlx5_stride_block_entry {
__be16 stride;
__be16 bcount;
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index aad53cb72f17..7f31432f44c2 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -132,4 +132,6 @@ int mlx5_nic_vport_affiliate_multiport(struct mlx5_core_dev *master_mdev,
int mlx5_nic_vport_unaffiliate_multiport(struct mlx5_core_dev *port_mdev);
u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev);
+int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out,
+ u16 opmod);
#endif /* __MLX5_VPORT_H__ */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3bedc449c14d..8178fe894e2e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -74,6 +74,7 @@ static inline void totalram_pages_add(long count)
extern void * high_memory;
extern int page_cluster;
+extern const int page_cluster_max;
#ifdef CONFIG_SYSCTL
extern int sysctl_legacy_va_layout;
@@ -549,7 +550,7 @@ struct vm_operations_struct {
/*
* Called by mprotect() to make driver-specific permission
* checks before mprotect() is finalised. The VMA must not
- * be modified. Returns 0 if eprotect() can proceed.
+ * be modified. Returns 0 if mprotect() can proceed.
*/
int (*mprotect)(struct vm_area_struct *vma, unsigned long start,
unsigned long end, unsigned long newflags);
@@ -661,14 +662,48 @@ static inline bool vma_is_accessible(struct vm_area_struct *vma)
return vma->vm_flags & VM_ACCESS_FLAGS;
}
+static inline
+struct vm_area_struct *vma_find(struct vma_iterator *vmi, unsigned long max)
+{
+ return mas_find(&vmi->mas, max);
+}
+
+static inline struct vm_area_struct *vma_next(struct vma_iterator *vmi)
+{
+ /*
+ * Uses vma_find() to get the first VMA when the iterator starts.
+ * Calling mas_next() could skip the first entry.
+ */
+ return vma_find(vmi, ULONG_MAX);
+}
+
+static inline struct vm_area_struct *vma_prev(struct vma_iterator *vmi)
+{
+ return mas_prev(&vmi->mas, 0);
+}
+
+static inline unsigned long vma_iter_addr(struct vma_iterator *vmi)
+{
+ return vmi->mas.index;
+}
+
+#define for_each_vma(__vmi, __vma) \
+ while (((__vma) = vma_next(&(__vmi))) != NULL)
+
+/* The MM code likes to work with exclusive end addresses */
+#define for_each_vma_range(__vmi, __vma, __end) \
+ while (((__vma) = vma_find(&(__vmi), (__end) - 1)) != NULL)
+
#ifdef CONFIG_SHMEM
/*
* The vma_is_shmem is not inline because it is used only by slow
* paths in userfault.
*/
bool vma_is_shmem(struct vm_area_struct *vma);
+bool vma_is_anon_shmem(struct vm_area_struct *vma);
#else
static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; }
+static inline bool vma_is_anon_shmem(struct vm_area_struct *vma) { return false; }
#endif
int vma_is_stack_for_current(struct vm_area_struct *vma);
@@ -697,7 +732,9 @@ static inline unsigned int compound_order(struct page *page)
*/
static inline unsigned int folio_order(struct folio *folio)
{
- return compound_order(&folio->page);
+ if (!folio_test_large(folio))
+ return 0;
+ return folio->_folio_order;
}
#include <linux/huge_mm.h>
@@ -783,8 +820,8 @@ static inline int is_vmalloc_or_module_addr(const void *x)
/*
* How many times the entire folio is mapped as a single unit (eg by a
* PMD or PUD entry). This is probably not what you want, except for
- * debugging purposes; look at folio_mapcount() or page_mapcount()
- * instead.
+ * debugging purposes - it does not include PTE-mapped sub-pages; look
+ * at folio_mapcount() or page_mapcount() or total_mapcount() instead.
*/
static inline int folio_entire_mapcount(struct folio *folio)
{
@@ -794,12 +831,29 @@ static inline int folio_entire_mapcount(struct folio *folio)
/*
* Mapcount of compound page as a whole, does not include mapped sub-pages.
- *
- * Must be called only for compound pages.
+ * Must be called only on head of compound page.
+ */
+static inline int head_compound_mapcount(struct page *head)
+{
+ return atomic_read(compound_mapcount_ptr(head)) + 1;
+}
+
+/*
+ * If a 16GB hugetlb page were mapped by PTEs of all of its 4kB sub-pages,
+ * its subpages_mapcount would be 0x400000: choose the COMPOUND_MAPPED bit
+ * above that range, instead of 2*(PMD_SIZE/PAGE_SIZE). Hugetlb currently
+ * leaves subpages_mapcount at 0, but avoid surprise if it participates later.
*/
-static inline int compound_mapcount(struct page *page)
+#define COMPOUND_MAPPED 0x800000
+#define SUBPAGES_MAPPED (COMPOUND_MAPPED - 1)
+
+/*
+ * Number of sub-pages mapped by PTE, does not include compound mapcount.
+ * Must be called only on head of compound page.
+ */
+static inline int head_subpages_mapcount(struct page *head)
{
- return folio_entire_mapcount(page_folio(page));
+ return atomic_read(subpages_mapcount_ptr(head)) & SUBPAGES_MAPPED;
}
/*
@@ -812,11 +866,9 @@ static inline void page_mapcount_reset(struct page *page)
atomic_set(&(page)->_mapcount, -1);
}
-int __page_mapcount(struct page *page);
-
/*
* Mapcount of 0-order page; when compound sub-page, includes
- * compound_mapcount().
+ * compound_mapcount of compound_head of page.
*
* Result is undefined for pages which cannot be mapped into userspace.
* For example SLAB or special types of pages. See function page_has_type().
@@ -824,25 +876,75 @@ int __page_mapcount(struct page *page);
*/
static inline int page_mapcount(struct page *page)
{
- if (unlikely(PageCompound(page)))
- return __page_mapcount(page);
- return atomic_read(&page->_mapcount) + 1;
+ int mapcount = atomic_read(&page->_mapcount) + 1;
+
+ if (likely(!PageCompound(page)))
+ return mapcount;
+ page = compound_head(page);
+ return head_compound_mapcount(page) + mapcount;
}
-int folio_mapcount(struct folio *folio);
+int total_compound_mapcount(struct page *head);
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static inline int total_mapcount(struct page *page)
+/**
+ * folio_mapcount() - Calculate the number of mappings of this folio.
+ * @folio: The folio.
+ *
+ * A large folio tracks both how many times the entire folio is mapped,
+ * and how many times each individual page in the folio is mapped.
+ * This function calculates the total number of times the folio is
+ * mapped.
+ *
+ * Return: The number of times this folio is mapped.
+ */
+static inline int folio_mapcount(struct folio *folio)
{
- return folio_mapcount(page_folio(page));
+ if (likely(!folio_test_large(folio)))
+ return atomic_read(&folio->_mapcount) + 1;
+ return total_compound_mapcount(&folio->page);
}
-#else
static inline int total_mapcount(struct page *page)
{
- return page_mapcount(page);
+ if (likely(!PageCompound(page)))
+ return atomic_read(&page->_mapcount) + 1;
+ return total_compound_mapcount(compound_head(page));
+}
+
+static inline bool folio_large_is_mapped(struct folio *folio)
+{
+ /*
+ * Reading folio_mapcount_ptr() below could be omitted if hugetlb
+ * participated in incrementing subpages_mapcount when compound mapped.
+ */
+ return atomic_read(folio_subpages_mapcount_ptr(folio)) > 0 ||
+ atomic_read(folio_mapcount_ptr(folio)) >= 0;
+}
+
+/**
+ * folio_mapped - Is this folio mapped into userspace?
+ * @folio: The folio.
+ *
+ * Return: True if any page in this folio is referenced by user page tables.
+ */
+static inline bool folio_mapped(struct folio *folio)
+{
+ if (likely(!folio_test_large(folio)))
+ return atomic_read(&folio->_mapcount) >= 0;
+ return folio_large_is_mapped(folio);
+}
+
+/*
+ * Return true if this page is mapped into pagetables.
+ * For compound page it returns true if any sub-page of compound page is mapped,
+ * even if this particular sub-page is not itself mapped by any PTE or PMD.
+ */
+static inline bool page_mapped(struct page *page)
+{
+ if (likely(!PageCompound(page)))
+ return atomic_read(&page->_mapcount) >= 0;
+ return folio_large_is_mapped(page_folio(page));
}
-#endif
static inline struct page *virt_to_head_page(const void *x)
{
@@ -895,6 +997,13 @@ static inline void set_compound_page_dtor(struct page *page,
page[1].compound_dtor = compound_dtor;
}
+static inline void folio_set_compound_dtor(struct folio *folio,
+ enum compound_dtor_id compound_dtor)
+{
+ VM_BUG_ON_FOLIO(compound_dtor >= NR_COMPOUND_DTORS, folio);
+ folio->_folio_dtor = compound_dtor;
+}
+
void destroy_large_folio(struct folio *folio);
static inline int head_compound_pincount(struct page *head)
@@ -910,6 +1019,22 @@ static inline void set_compound_order(struct page *page, unsigned int order)
#endif
}
+/*
+ * folio_set_compound_order is generally passed a non-zero order to
+ * initialize a large folio. However, hugetlb code abuses this by
+ * passing in zero when 'dissolving' a large folio.
+ */
+static inline void folio_set_compound_order(struct folio *folio,
+ unsigned int order)
+{
+ VM_BUG_ON_FOLIO(!folio_test_large(folio), folio);
+
+ folio->_folio_order = order;
+#ifdef CONFIG_64BIT
+ folio->_folio_nr_pages = order ? 1U << order : 0;
+#endif
+}
+
/* Returns the number of pages in this potentially compound page. */
static inline unsigned long compound_nr(struct page *page)
{
@@ -1095,7 +1220,7 @@ static inline void get_page(struct page *page)
folio_get(page_folio(page));
}
-bool __must_check try_grab_page(struct page *page, unsigned int flags);
+int __must_check try_grab_page(struct page *page, unsigned int flags);
static inline __must_check bool try_get_page(struct page *page)
{
@@ -1145,7 +1270,24 @@ static inline void folio_put_refs(struct folio *folio, int refs)
__folio_put(folio);
}
-void release_pages(struct page **pages, int nr);
+/**
+ * release_pages - release an array of pages or folios
+ *
+ * This just releases a simple array of multiple pages, and
+ * accepts various different forms of said page array: either
+ * a regular old boring array of pages, an array of folios, or
+ * an array of encoded page pointers.
+ *
+ * The transparent union syntax for this kind of "any of these
+ * argument types" is all kinds of ugly, so look away.
+ */
+typedef union {
+ struct page **pages;
+ struct folio **folios;
+ struct encoded_page **encoded_pages;
+} release_pages_arg __attribute__ ((__transparent_union__));
+
+void release_pages(release_pages_arg, int nr);
/**
* folios_put - Decrement the reference count on an array of folios.
@@ -1161,7 +1303,7 @@ void release_pages(struct page **pages, int nr);
*/
static inline void folios_put(struct folio **folios, unsigned int nr)
{
- release_pages((struct page **)folios, nr);
+ release_pages(folios, nr);
}
static inline void put_page(struct page *page)
@@ -1255,6 +1397,18 @@ static inline int folio_nid(const struct folio *folio)
}
#ifdef CONFIG_NUMA_BALANCING
+/* page access time bits needs to hold at least 4 seconds */
+#define PAGE_ACCESS_TIME_MIN_BITS 12
+#if LAST_CPUPID_SHIFT < PAGE_ACCESS_TIME_MIN_BITS
+#define PAGE_ACCESS_TIME_BUCKETS \
+ (PAGE_ACCESS_TIME_MIN_BITS - LAST_CPUPID_SHIFT)
+#else
+#define PAGE_ACCESS_TIME_BUCKETS 0
+#endif
+
+#define PAGE_ACCESS_TIME_MASK \
+ (LAST_CPUPID_MASK << PAGE_ACCESS_TIME_BUCKETS)
+
static inline int cpu_pid_to_cpupid(int cpu, int pid)
{
return ((cpu & LAST__CPU_MASK) << LAST__PID_SHIFT) | (pid & LAST__PID_MASK);
@@ -1318,12 +1472,25 @@ static inline void page_cpupid_reset_last(struct page *page)
page->flags |= LAST_CPUPID_MASK << LAST_CPUPID_PGSHIFT;
}
#endif /* LAST_CPUPID_NOT_IN_PAGE_FLAGS */
+
+static inline int xchg_page_access_time(struct page *page, int time)
+{
+ int last_time;
+
+ last_time = page_cpupid_xchg_last(page, time >> PAGE_ACCESS_TIME_BUCKETS);
+ return last_time << PAGE_ACCESS_TIME_BUCKETS;
+}
#else /* !CONFIG_NUMA_BALANCING */
static inline int page_cpupid_xchg_last(struct page *page, int cpupid)
{
return page_to_nid(page); /* XXX */
}
+static inline int xchg_page_access_time(struct page *page, int time)
+{
+ return 0;
+}
+
static inline int page_cpupid_last(struct page *page)
{
return page_to_nid(page); /* XXX */
@@ -1465,6 +1632,11 @@ static inline unsigned long folio_pfn(struct folio *folio)
return page_to_pfn(&folio->page);
}
+static inline struct folio *pfn_folio(unsigned long pfn)
+{
+ return page_folio(pfn_to_page(pfn));
+}
+
static inline atomic_t *folio_pincount_ptr(struct folio *folio)
{
return &folio_page(folio, 1)->compound_pincount;
@@ -1544,9 +1716,16 @@ static inline bool is_longterm_pinnable_page(struct page *page)
if (mt == MIGRATE_CMA || mt == MIGRATE_ISOLATE)
return false;
#endif
- return !(is_device_coherent_page(page) ||
- is_zone_movable_page(page) ||
- is_zero_pfn(page_to_pfn(page)));
+ /* The zero page may always be pinned */
+ if (is_zero_pfn(page_to_pfn(page)))
+ return true;
+
+ /* Coherent device memory must always allow eviction. */
+ if (is_device_coherent_page(page))
+ return false;
+
+ /* Otherwise, non-movable zone pages can be pinned. */
+ return !is_zone_movable_page(page);
}
#else
static inline bool is_longterm_pinnable_page(struct page *page)
@@ -1590,7 +1769,13 @@ static inline void set_page_links(struct page *page, enum zone_type zone,
*/
static inline long folio_nr_pages(struct folio *folio)
{
- return compound_nr(&folio->page);
+ if (!folio_test_large(folio))
+ return 1;
+#ifdef CONFIG_64BIT
+ return folio->_folio_nr_pages;
+#else
+ return 1L << folio->_folio_order;
+#endif
}
/**
@@ -1722,9 +1907,6 @@ static inline pgoff_t page_index(struct page *page)
return page->index;
}
-bool page_mapped(struct page *page);
-bool folio_mapped(struct folio *folio);
-
/*
* Return true only if the page has been allocated with
* ALLOC_NO_WATERMARKS and the low watermark was not
@@ -1769,7 +1951,30 @@ extern void pagefault_out_of_memory(void);
*/
#define SHOW_MEM_FILTER_NODES (0x0001u) /* disallowed nodes */
-extern void show_free_areas(unsigned int flags, nodemask_t *nodemask);
+extern void __show_free_areas(unsigned int flags, nodemask_t *nodemask, int max_zone_idx);
+static void __maybe_unused show_free_areas(unsigned int flags, nodemask_t *nodemask)
+{
+ __show_free_areas(flags, nodemask, MAX_NR_ZONES - 1);
+}
+
+/*
+ * Parameter block passed down to zap_pte_range in exceptional cases.
+ */
+struct zap_details {
+ struct folio *single_folio; /* Locked folio to be unmapped */
+ bool even_cows; /* Zap COWed private pages too? */
+ zap_flags_t zap_flags; /* Extra flags for zapping */
+};
+
+/*
+ * Whether to drop the pte markers, for example, the uffd-wp information for
+ * file-backed memory. This should only be specified when we will completely
+ * drop the page in the mm, either by truncation or unmapping of the vma. By
+ * default, the flag is not set.
+ */
+#define ZAP_FLAG_DROP_MARKER ((__force zap_flags_t) BIT(0))
+/* Set in unmap_vmas() to indicate a final unmap call. Only used by hugetlb */
+#define ZAP_FLAG_UNMAP ((__force zap_flags_t) BIT(1))
#ifdef CONFIG_MMU
extern bool can_do_mlock(void);
@@ -1788,8 +1993,11 @@ void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
unsigned long size);
void zap_page_range(struct vm_area_struct *vma, unsigned long address,
unsigned long size);
-void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
- unsigned long start, unsigned long end);
+void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,
+ unsigned long size, struct zap_details *details);
+void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt,
+ struct vm_area_struct *start_vma, unsigned long start,
+ unsigned long end);
struct mmu_notifier_range;
@@ -1922,6 +2130,22 @@ extern unsigned long move_page_tables(struct vm_area_struct *vma,
#define MM_CP_UFFD_WP_ALL (MM_CP_UFFD_WP | \
MM_CP_UFFD_WP_RESOLVE)
+int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot);
+static inline bool vma_wants_manual_pte_write_upgrade(struct vm_area_struct *vma)
+{
+ /*
+ * We want to check manually if we can change individual PTEs writable
+ * if we can't do that automatically for all PTEs in a mapping. For
+ * private mappings, that's always the case when we have write
+ * permissions as we properly have to handle COW.
+ */
+ if (vma->vm_flags & VM_SHARED)
+ return vma_wants_writenotify(vma, vma->vm_page_prot);
+ return !!(vma->vm_flags & VM_WRITE);
+
+}
+bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr,
+ pte_t pte);
extern unsigned long change_protection(struct mmu_gather *tlb,
struct vm_area_struct *vma, unsigned long start,
unsigned long end, pgprot_t newprot,
@@ -1948,40 +2172,30 @@ static inline bool get_user_page_fast_only(unsigned long addr,
*/
static inline unsigned long get_mm_counter(struct mm_struct *mm, int member)
{
- long val = atomic_long_read(&mm->rss_stat.count[member]);
-
-#ifdef SPLIT_RSS_COUNTING
- /*
- * counter is updated in asynchronous manner and may go to minus.
- * But it's never be expected number for users.
- */
- if (val < 0)
- val = 0;
-#endif
- return (unsigned long)val;
+ return percpu_counter_read_positive(&mm->rss_stat[member]);
}
-void mm_trace_rss_stat(struct mm_struct *mm, int member, long count);
+void mm_trace_rss_stat(struct mm_struct *mm, int member);
static inline void add_mm_counter(struct mm_struct *mm, int member, long value)
{
- long count = atomic_long_add_return(value, &mm->rss_stat.count[member]);
+ percpu_counter_add(&mm->rss_stat[member], value);
- mm_trace_rss_stat(mm, member, count);
+ mm_trace_rss_stat(mm, member);
}
static inline void inc_mm_counter(struct mm_struct *mm, int member)
{
- long count = atomic_long_inc_return(&mm->rss_stat.count[member]);
+ percpu_counter_inc(&mm->rss_stat[member]);
- mm_trace_rss_stat(mm, member, count);
+ mm_trace_rss_stat(mm, member);
}
static inline void dec_mm_counter(struct mm_struct *mm, int member)
{
- long count = atomic_long_dec_return(&mm->rss_stat.count[member]);
+ percpu_counter_dec(&mm->rss_stat[member]);
- mm_trace_rss_stat(mm, member, count);
+ mm_trace_rss_stat(mm, member);
}
/* Optimized variant when page is already known not to be PageAnon */
@@ -2071,8 +2285,6 @@ static inline int pte_devmap(pte_t pte)
}
#endif
-int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot);
-
extern pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr,
spinlock_t **ptl);
static inline pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr,
@@ -2321,7 +2533,7 @@ static inline void pgtable_pte_page_dtor(struct page *page)
#if USE_SPLIT_PMD_PTLOCKS
-static struct page *pmd_to_page(pmd_t *pmd)
+static inline struct page *pmd_pgtable_page(pmd_t *pmd)
{
unsigned long mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1);
return virt_to_page((void *)((unsigned long) pmd & mask));
@@ -2329,7 +2541,7 @@ static struct page *pmd_to_page(pmd_t *pmd)
static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd)
{
- return ptlock_ptr(pmd_to_page(pmd));
+ return ptlock_ptr(pmd_pgtable_page(pmd));
}
static inline bool pmd_ptlock_init(struct page *page)
@@ -2348,7 +2560,7 @@ static inline void pmd_ptlock_free(struct page *page)
ptlock_free(page);
}
-#define pmd_huge_pte(mm, pmd) (pmd_to_page(pmd)->pmd_huge_pte)
+#define pmd_huge_pte(mm, pmd) (pmd_pgtable_page(pmd)->pmd_huge_pte)
#else
@@ -2488,7 +2700,6 @@ extern unsigned long absent_pages_in_range(unsigned long start_pfn,
unsigned long end_pfn);
extern void get_pfn_range_for_nid(unsigned int nid,
unsigned long *start_pfn, unsigned long *end_pfn);
-extern unsigned long find_min_pfn_with_active_regions(void);
#ifndef CONFIG_NUMA
static inline int early_pfn_to_nid(unsigned long pfn)
@@ -2509,7 +2720,12 @@ extern void calculate_min_free_kbytes(void);
extern int __meminit init_per_zone_wmark_min(void);
extern void mem_init(void);
extern void __init mmap_init(void);
-extern void show_mem(unsigned int flags, nodemask_t *nodemask);
+
+extern void __show_mem(unsigned int flags, nodemask_t *nodemask, int max_zone_idx);
+static inline void show_mem(unsigned int flags, nodemask_t *nodemask)
+{
+ __show_mem(flags, nodemask, MAX_NR_ZONES - 1);
+}
extern long si_mem_available(void);
extern void si_meminfo(struct sysinfo * val);
extern void si_meminfo_node(struct sysinfo *val, int nid);
@@ -2586,14 +2802,15 @@ extern int __split_vma(struct mm_struct *, struct vm_area_struct *,
extern int split_vma(struct mm_struct *, struct vm_area_struct *,
unsigned long addr, int new_below);
extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
-extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,
- struct rb_node **, struct rb_node *);
extern void unlink_file_vma(struct vm_area_struct *);
extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
unsigned long addr, unsigned long len, pgoff_t pgoff,
bool *need_rmap_locks);
extern void exit_mmap(struct mm_struct *);
+void vma_mas_store(struct vm_area_struct *vma, struct ma_state *mas);
+void vma_mas_remove(struct vm_area_struct *vma, struct ma_state *mas);
+
static inline int check_data_rlimit(unsigned long rlim,
unsigned long new,
unsigned long start,
@@ -2641,8 +2858,9 @@ extern unsigned long mmap_region(struct file *file, unsigned long addr,
extern unsigned long do_mmap(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot, unsigned long flags,
unsigned long pgoff, unsigned long *populate, struct list_head *uf);
-extern int __do_munmap(struct mm_struct *, unsigned long, size_t,
- struct list_head *uf, bool downgrade);
+extern int do_mas_munmap(struct ma_state *mas, struct mm_struct *mm,
+ unsigned long start, size_t len, struct list_head *uf,
+ bool downgrade);
extern int do_munmap(struct mm_struct *, unsigned long, size_t,
struct list_head *uf);
extern int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior);
@@ -2709,26 +2927,12 @@ extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long add
extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr,
struct vm_area_struct **pprev);
-/**
- * find_vma_intersection() - Look up the first VMA which intersects the interval
- * @mm: The process address space.
- * @start_addr: The inclusive start user address.
- * @end_addr: The exclusive end user address.
- *
- * Returns: The first VMA within the provided range, %NULL otherwise. Assumes
- * start_addr < end_addr.
+/*
+ * Look up the first VMA which intersects the interval [start_addr, end_addr)
+ * NULL if none. Assume start_addr < end_addr.
*/
-static inline
struct vm_area_struct *find_vma_intersection(struct mm_struct *mm,
- unsigned long start_addr,
- unsigned long end_addr)
-{
- struct vm_area_struct *vma = find_vma(mm, start_addr);
-
- if (vma && end_addr <= vma->vm_start)
- vma = NULL;
- return vma;
-}
+ unsigned long start_addr, unsigned long end_addr);
/**
* vma_lookup() - Find a VMA at a specific address
@@ -2740,12 +2944,7 @@ struct vm_area_struct *find_vma_intersection(struct mm_struct *mm,
static inline
struct vm_area_struct *vma_lookup(struct mm_struct *mm, unsigned long addr)
{
- struct vm_area_struct *vma = find_vma(mm, addr);
-
- if (vma && addr < vma->vm_start)
- vma = NULL;
-
- return vma;
+ return mtree_load(&mm->mm_mt, addr);
}
static inline unsigned long vm_start_gap(struct vm_area_struct *vma)
@@ -2781,7 +2980,7 @@ static inline unsigned long vma_pages(struct vm_area_struct *vma)
static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm,
unsigned long vm_start, unsigned long vm_end)
{
- struct vm_area_struct *vma = find_vma(mm, vm_start);
+ struct vm_area_struct *vma = vma_lookup(mm, vm_start);
if (vma && (vma->vm_start != vm_start || vma->vm_end != vm_end))
vma = NULL;
@@ -2881,16 +3080,14 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
* and return without waiting upon it */
#define FOLL_NOFAULT 0x80 /* do not fault in pages */
#define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */
-#define FOLL_NUMA 0x200 /* force NUMA hinting page fault */
-#define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */
#define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */
#define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */
-#define FOLL_COW 0x4000 /* internal GUP flag */
#define FOLL_ANON 0x8000 /* don't do file mappings */
#define FOLL_LONGTERM 0x10000 /* mapping lifetime is indefinite: see below */
#define FOLL_SPLIT_PMD 0x20000 /* split huge pmd before returning */
#define FOLL_PIN 0x40000 /* pages must be released via unpin_user_page */
#define FOLL_FAST_ONLY 0x80000 /* gup_fast: prevent fall-back to slow gup */
+#define FOLL_PCI_P2PDMA 0x100000 /* allow returning PCI P2PDMA pages */
/*
* FOLL_PIN and FOLL_LONGTERM may be used in various combinations with each
@@ -2969,14 +3166,18 @@ static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags)
* PageAnonExclusive() has to protect against concurrent GUP:
* * Ordinary GUP: Using the PT lock
* * GUP-fast and fork(): mm->write_protect_seq
- * * GUP-fast and KSM or temporary unmapping (swap, migration):
- * clear/invalidate+flush of the page table entry
+ * * GUP-fast and KSM or temporary unmapping (swap, migration): see
+ * page_try_share_anon_rmap()
*
* Must be called with the (sub)page that's actually referenced via the
* page table entry, which might not necessarily be the head page for a
* PTE-mapped THP.
+ *
+ * If the vma is NULL, we're coming from the GUP-fast path and might have
+ * to fallback to the slow path just to lookup the vma.
*/
-static inline bool gup_must_unshare(unsigned int flags, struct page *page)
+static inline bool gup_must_unshare(struct vm_area_struct *vma,
+ unsigned int flags, struct page *page)
{
/*
* FOLL_WRITE is implicitly handled correctly as the page table entry
@@ -2989,8 +3190,30 @@ static inline bool gup_must_unshare(unsigned int flags, struct page *page)
* Note: PageAnon(page) is stable until the page is actually getting
* freed.
*/
- if (!PageAnon(page))
- return false;
+ if (!PageAnon(page)) {
+ /*
+ * We only care about R/O long-term pining: R/O short-term
+ * pinning does not have the semantics to observe successive
+ * changes through the process page tables.
+ */
+ if (!(flags & FOLL_LONGTERM))
+ return false;
+
+ /* We really need the vma ... */
+ if (!vma)
+ return true;
+
+ /*
+ * ... because we only care about writable private ("COW")
+ * mappings where we have to break COW early.
+ */
+ return is_cow_mapping(vma->vm_flags);
+ }
+
+ /* Paired with a memory barrier in page_try_share_anon_rmap(). */
+ if (IS_ENABLED(CONFIG_HAVE_FAST_GUP))
+ smp_rmb();
+
/*
* Note that PageKsm() pages cannot be exclusive, and consequently,
* cannot get pinned.
@@ -2998,6 +3221,21 @@ static inline bool gup_must_unshare(unsigned int flags, struct page *page)
return !PageAnonExclusive(page);
}
+/*
+ * Indicates whether GUP can follow a PROT_NONE mapped page, or whether
+ * a (NUMA hinting) fault is required.
+ */
+static inline bool gup_can_follow_protnone(unsigned int flags)
+{
+ /*
+ * FOLL_FORCE has to be able to make progress even if the VMA is
+ * inaccessible. Further, FOLL_FORCE access usually does not represent
+ * application behaviour and we should avoid triggering NUMA hinting
+ * faults.
+ */
+ return flags & FOLL_FORCE;
+}
+
typedef int (*pte_fn_t)(pte_t *pte, unsigned long addr, void *data);
extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
unsigned long size, pte_fn_t fn, void *data);
@@ -3005,7 +3243,7 @@ extern int apply_to_existing_page_range(struct mm_struct *mm,
unsigned long address, unsigned long size,
pte_fn_t fn, void *data);
-extern void init_mem_debugging_and_hardening(void);
+extern void __init init_mem_debugging_and_hardening(void);
#ifdef CONFIG_PAGE_POISONING
extern void __kernel_poison_pages(struct page *page, int numpages);
extern void __kernel_unpoison_pages(struct page *page, int numpages);
@@ -3146,6 +3384,8 @@ void *sparse_buffer_alloc(unsigned long size);
struct page * __populate_section_memmap(unsigned long pfn,
unsigned long nr_pages, int nid, struct vmem_altmap *altmap,
struct dev_pagemap *pgmap);
+void pmd_init(void *addr);
+void pud_init(void *addr);
pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node);
pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node);
@@ -3157,8 +3397,14 @@ struct vmem_altmap;
void *vmemmap_alloc_block_buf(unsigned long size, int node,
struct vmem_altmap *altmap);
void vmemmap_verify(pte_t *, int, unsigned long, unsigned long);
+void vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
+ unsigned long addr, unsigned long next);
+int vmemmap_check_pmd(pmd_t *pmd, int node,
+ unsigned long addr, unsigned long next);
int vmemmap_populate_basepages(unsigned long start, unsigned long end,
int node, struct vmem_altmap *altmap);
+int vmemmap_populate_hugepages(unsigned long start, unsigned long end,
+ int node, struct vmem_altmap *altmap);
int vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap);
void vmemmap_populate_print_last(void);
@@ -3181,7 +3427,6 @@ enum mf_flags {
int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index,
unsigned long count, int mf_flags);
extern int memory_failure(unsigned long pfn, int flags);
-extern void memory_failure_queue(unsigned long pfn, int flags);
extern void memory_failure_queue_kick(int cpu);
extern int unpoison_memory(unsigned long pfn);
extern int sysctl_memory_failure_early_kill;
@@ -3190,12 +3435,42 @@ extern void shake_page(struct page *p);
extern atomic_long_t num_poisoned_pages __read_mostly;
extern int soft_offline_page(unsigned long pfn, int flags);
#ifdef CONFIG_MEMORY_FAILURE
-extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags);
+extern void memory_failure_queue(unsigned long pfn, int flags);
+extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
+ bool *migratable_cleared);
+void num_poisoned_pages_inc(unsigned long pfn);
+void num_poisoned_pages_sub(unsigned long pfn, long i);
#else
-static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
+static inline void memory_failure_queue(unsigned long pfn, int flags)
+{
+}
+
+static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
+ bool *migratable_cleared)
{
return 0;
}
+
+static inline void num_poisoned_pages_inc(unsigned long pfn)
+{
+}
+
+static inline void num_poisoned_pages_sub(unsigned long pfn, long i)
+{
+}
+#endif
+
+#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
+extern void memblk_nr_poison_inc(unsigned long pfn);
+extern void memblk_nr_poison_sub(unsigned long pfn, long i);
+#else
+static inline void memblk_nr_poison_inc(unsigned long pfn)
+{
+}
+
+static inline void memblk_nr_poison_sub(unsigned long pfn, long i)
+{
+}
#endif
#ifndef arch_memory_failure
@@ -3380,12 +3655,4 @@ madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
}
#endif
-/*
- * Whether to drop the pte markers, for example, the uffd-wp information for
- * file-backed memory. This should only be specified when we will completely
- * drop the page in the mm, either by truncation or unmapping of the vma. By
- * default, the flag is not set.
- */
-#define ZAP_FLAG_DROP_MARKER ((__force zap_flags_t) BIT(0))
-
#endif /* _LINUX_MM_H */
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 7b25b53c474a..e8ed225d8f7c 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -34,15 +34,25 @@ static inline int page_is_file_lru(struct page *page)
return folio_is_file_lru(page_folio(page));
}
-static __always_inline void update_lru_size(struct lruvec *lruvec,
+static __always_inline void __update_lru_size(struct lruvec *lruvec,
enum lru_list lru, enum zone_type zid,
long nr_pages)
{
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
+ lockdep_assert_held(&lruvec->lru_lock);
+ WARN_ON_ONCE(nr_pages != (int)nr_pages);
+
__mod_lruvec_state(lruvec, NR_LRU_BASE + lru, nr_pages);
__mod_zone_page_state(&pgdat->node_zones[zid],
NR_ZONE_LRU_BASE + lru, nr_pages);
+}
+
+static __always_inline void update_lru_size(struct lruvec *lruvec,
+ enum lru_list lru, enum zone_type zid,
+ long nr_pages)
+{
+ __update_lru_size(lruvec, lru, zid, nr_pages);
#ifdef CONFIG_MEMCG
mem_cgroup_update_lru_size(lruvec, lru, zid, nr_pages);
#endif
@@ -66,11 +76,6 @@ static __always_inline void __folio_clear_lru_flags(struct folio *folio)
__folio_clear_unevictable(folio);
}
-static __always_inline void __clear_page_lru_flags(struct page *page)
-{
- __folio_clear_lru_flags(page_folio(page));
-}
-
/**
* folio_lru_list - Which LRU list should a folio be on?
* @folio: The folio to test.
@@ -94,11 +99,224 @@ static __always_inline enum lru_list folio_lru_list(struct folio *folio)
return lru;
}
+#ifdef CONFIG_LRU_GEN
+
+#ifdef CONFIG_LRU_GEN_ENABLED
+static inline bool lru_gen_enabled(void)
+{
+ DECLARE_STATIC_KEY_TRUE(lru_gen_caps[NR_LRU_GEN_CAPS]);
+
+ return static_branch_likely(&lru_gen_caps[LRU_GEN_CORE]);
+}
+#else
+static inline bool lru_gen_enabled(void)
+{
+ DECLARE_STATIC_KEY_FALSE(lru_gen_caps[NR_LRU_GEN_CAPS]);
+
+ return static_branch_unlikely(&lru_gen_caps[LRU_GEN_CORE]);
+}
+#endif
+
+static inline bool lru_gen_in_fault(void)
+{
+ return current->in_lru_fault;
+}
+
+static inline int lru_gen_from_seq(unsigned long seq)
+{
+ return seq % MAX_NR_GENS;
+}
+
+static inline int lru_hist_from_seq(unsigned long seq)
+{
+ return seq % NR_HIST_GENS;
+}
+
+static inline int lru_tier_from_refs(int refs)
+{
+ VM_WARN_ON_ONCE(refs > BIT(LRU_REFS_WIDTH));
+
+ /* see the comment in folio_lru_refs() */
+ return order_base_2(refs + 1);
+}
+
+static inline int folio_lru_refs(struct folio *folio)
+{
+ unsigned long flags = READ_ONCE(folio->flags);
+ bool workingset = flags & BIT(PG_workingset);
+
+ /*
+ * Return the number of accesses beyond PG_referenced, i.e., N-1 if the
+ * total number of accesses is N>1, since N=0,1 both map to the first
+ * tier. lru_tier_from_refs() will account for this off-by-one. Also see
+ * the comment on MAX_NR_TIERS.
+ */
+ return ((flags & LRU_REFS_MASK) >> LRU_REFS_PGOFF) + workingset;
+}
+
+static inline int folio_lru_gen(struct folio *folio)
+{
+ unsigned long flags = READ_ONCE(folio->flags);
+
+ return ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
+}
+
+static inline bool lru_gen_is_active(struct lruvec *lruvec, int gen)
+{
+ unsigned long max_seq = lruvec->lrugen.max_seq;
+
+ VM_WARN_ON_ONCE(gen >= MAX_NR_GENS);
+
+ /* see the comment on MIN_NR_GENS */
+ return gen == lru_gen_from_seq(max_seq) || gen == lru_gen_from_seq(max_seq - 1);
+}
+
+static inline void lru_gen_update_size(struct lruvec *lruvec, struct folio *folio,
+ int old_gen, int new_gen)
+{
+ int type = folio_is_file_lru(folio);
+ int zone = folio_zonenum(folio);
+ int delta = folio_nr_pages(folio);
+ enum lru_list lru = type * LRU_INACTIVE_FILE;
+ struct lru_gen_struct *lrugen = &lruvec->lrugen;
+
+ VM_WARN_ON_ONCE(old_gen != -1 && old_gen >= MAX_NR_GENS);
+ VM_WARN_ON_ONCE(new_gen != -1 && new_gen >= MAX_NR_GENS);
+ VM_WARN_ON_ONCE(old_gen == -1 && new_gen == -1);
+
+ if (old_gen >= 0)
+ WRITE_ONCE(lrugen->nr_pages[old_gen][type][zone],
+ lrugen->nr_pages[old_gen][type][zone] - delta);
+ if (new_gen >= 0)
+ WRITE_ONCE(lrugen->nr_pages[new_gen][type][zone],
+ lrugen->nr_pages[new_gen][type][zone] + delta);
+
+ /* addition */
+ if (old_gen < 0) {
+ if (lru_gen_is_active(lruvec, new_gen))
+ lru += LRU_ACTIVE;
+ __update_lru_size(lruvec, lru, zone, delta);
+ return;
+ }
+
+ /* deletion */
+ if (new_gen < 0) {
+ if (lru_gen_is_active(lruvec, old_gen))
+ lru += LRU_ACTIVE;
+ __update_lru_size(lruvec, lru, zone, -delta);
+ return;
+ }
+
+ /* promotion */
+ if (!lru_gen_is_active(lruvec, old_gen) && lru_gen_is_active(lruvec, new_gen)) {
+ __update_lru_size(lruvec, lru, zone, -delta);
+ __update_lru_size(lruvec, lru + LRU_ACTIVE, zone, delta);
+ }
+
+ /* demotion requires isolation, e.g., lru_deactivate_fn() */
+ VM_WARN_ON_ONCE(lru_gen_is_active(lruvec, old_gen) && !lru_gen_is_active(lruvec, new_gen));
+}
+
+static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
+{
+ unsigned long seq;
+ unsigned long flags;
+ int gen = folio_lru_gen(folio);
+ int type = folio_is_file_lru(folio);
+ int zone = folio_zonenum(folio);
+ struct lru_gen_struct *lrugen = &lruvec->lrugen;
+
+ VM_WARN_ON_ONCE_FOLIO(gen != -1, folio);
+
+ if (folio_test_unevictable(folio) || !lrugen->enabled)
+ return false;
+ /*
+ * There are three common cases for this page:
+ * 1. If it's hot, e.g., freshly faulted in or previously hot and
+ * migrated, add it to the youngest generation.
+ * 2. If it's cold but can't be evicted immediately, i.e., an anon page
+ * not in swapcache or a dirty page pending writeback, add it to the
+ * second oldest generation.
+ * 3. Everything else (clean, cold) is added to the oldest generation.
+ */
+ if (folio_test_active(folio))
+ seq = lrugen->max_seq;
+ else if ((type == LRU_GEN_ANON && !folio_test_swapcache(folio)) ||
+ (folio_test_reclaim(folio) &&
+ (folio_test_dirty(folio) || folio_test_writeback(folio))))
+ seq = lrugen->min_seq[type] + 1;
+ else
+ seq = lrugen->min_seq[type];
+
+ gen = lru_gen_from_seq(seq);
+ flags = (gen + 1UL) << LRU_GEN_PGOFF;
+ /* see the comment on MIN_NR_GENS about PG_active */
+ set_mask_bits(&folio->flags, LRU_GEN_MASK | BIT(PG_active), flags);
+
+ lru_gen_update_size(lruvec, folio, -1, gen);
+ /* for folio_rotate_reclaimable() */
+ if (reclaiming)
+ list_add_tail(&folio->lru, &lrugen->lists[gen][type][zone]);
+ else
+ list_add(&folio->lru, &lrugen->lists[gen][type][zone]);
+
+ return true;
+}
+
+static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
+{
+ unsigned long flags;
+ int gen = folio_lru_gen(folio);
+
+ if (gen < 0)
+ return false;
+
+ VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
+ VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
+
+ /* for folio_migrate_flags() */
+ flags = !reclaiming && lru_gen_is_active(lruvec, gen) ? BIT(PG_active) : 0;
+ flags = set_mask_bits(&folio->flags, LRU_GEN_MASK, flags);
+ gen = ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
+
+ lru_gen_update_size(lruvec, folio, gen, -1);
+ list_del(&folio->lru);
+
+ return true;
+}
+
+#else /* !CONFIG_LRU_GEN */
+
+static inline bool lru_gen_enabled(void)
+{
+ return false;
+}
+
+static inline bool lru_gen_in_fault(void)
+{
+ return false;
+}
+
+static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
+{
+ return false;
+}
+
+static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
+{
+ return false;
+}
+
+#endif /* CONFIG_LRU_GEN */
+
static __always_inline
void lruvec_add_folio(struct lruvec *lruvec, struct folio *folio)
{
enum lru_list lru = folio_lru_list(folio);
+ if (lru_gen_add_folio(lruvec, folio, false))
+ return;
+
update_lru_size(lruvec, lru, folio_zonenum(folio),
folio_nr_pages(folio));
if (lru != LRU_UNEVICTABLE)
@@ -116,23 +334,23 @@ void lruvec_add_folio_tail(struct lruvec *lruvec, struct folio *folio)
{
enum lru_list lru = folio_lru_list(folio);
+ if (lru_gen_add_folio(lruvec, folio, true))
+ return;
+
update_lru_size(lruvec, lru, folio_zonenum(folio),
folio_nr_pages(folio));
/* This is not expected to be used on LRU_UNEVICTABLE */
list_add_tail(&folio->lru, &lruvec->lists[lru]);
}
-static __always_inline void add_page_to_lru_list_tail(struct page *page,
- struct lruvec *lruvec)
-{
- lruvec_add_folio_tail(lruvec, page_folio(page));
-}
-
static __always_inline
void lruvec_del_folio(struct lruvec *lruvec, struct folio *folio)
{
enum lru_list lru = folio_lru_list(folio);
+ if (lru_gen_del_folio(lruvec, folio, false))
+ return;
+
if (lru != LRU_UNEVICTABLE)
list_del(&folio->lru);
update_lru_size(lruvec, lru, folio_zonenum(folio),
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index cf97f3884fda..3b8475007734 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -9,6 +9,7 @@
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/rbtree.h>
+#include <linux/maple_tree.h>
#include <linux/rwsem.h>
#include <linux/completion.h>
#include <linux/cpumask.h>
@@ -17,6 +18,7 @@
#include <linux/page-flags-layout.h>
#include <linux/workqueue.h>
#include <linux/seqlock.h>
+#include <linux/percpu_counter.h>
#include <asm/mmu.h>
@@ -66,7 +68,7 @@ struct mem_cgroup;
#ifdef CONFIG_HAVE_ALIGNED_STRUCT_PAGE
#define _struct_page_alignment __aligned(2 * sizeof(unsigned long))
#else
-#define _struct_page_alignment
+#define _struct_page_alignment __aligned(sizeof(unsigned long))
#endif
struct page {
@@ -102,7 +104,10 @@ struct page {
};
/* See page-flags.h for PAGE_MAPPING_FLAGS */
struct address_space *mapping;
- pgoff_t index; /* Our offset within mapping. */
+ union {
+ pgoff_t index; /* Our offset within mapping. */
+ unsigned long share; /* share count for fsdax */
+ };
/**
* @private: Mapping-private opaque data.
* Usually used for buffer_heads if PagePrivate.
@@ -140,17 +145,26 @@ struct page {
unsigned char compound_dtor;
unsigned char compound_order;
atomic_t compound_mapcount;
+ atomic_t subpages_mapcount;
atomic_t compound_pincount;
#ifdef CONFIG_64BIT
unsigned int compound_nr; /* 1 << compound_order */
#endif
};
- struct { /* Second tail page of compound page */
+ struct { /* Second tail page of transparent huge page */
unsigned long _compound_pad_1; /* compound_head */
unsigned long _compound_pad_2;
/* For both global and memcg */
struct list_head deferred_list;
};
+ struct { /* Second tail page of hugetlb page */
+ unsigned long _hugetlb_pad_1; /* compound_head */
+ void *hugetlb_subpool;
+ void *hugetlb_cgroup;
+ void *hugetlb_cgroup_rsvd;
+ void *hugetlb_hwpoison;
+ /* No more space on 32-bit: use third tail if more */
+ };
struct { /* Page table pages */
unsigned long _pt_pad_1; /* compound_head */
pgtable_t pmd_huge_pte; /* protected by page->ptl */
@@ -223,11 +237,55 @@ struct page {
not kmapped, ie. highmem) */
#endif /* WANT_PAGE_VIRTUAL */
+#ifdef CONFIG_KMSAN
+ /*
+ * KMSAN metadata for this page:
+ * - shadow page: every bit indicates whether the corresponding
+ * bit of the original page is initialized (0) or not (1);
+ * - origin page: every 4 bytes contain an id of the stack trace
+ * where the uninitialized value was created.
+ */
+ struct page *kmsan_shadow;
+ struct page *kmsan_origin;
+#endif
+
#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
int _last_cpupid;
#endif
} _struct_page_alignment;
+/*
+ * struct encoded_page - a nonexistent type marking this pointer
+ *
+ * An 'encoded_page' pointer is a pointer to a regular 'struct page', but
+ * with the low bits of the pointer indicating extra context-dependent
+ * information. Not super-common, but happens in mmu_gather and mlock
+ * handling, and this acts as a type system check on that use.
+ *
+ * We only really have two guaranteed bits in general, although you could
+ * play with 'struct page' alignment (see CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
+ * for more.
+ *
+ * Use the supplied helper functions to endcode/decode the pointer and bits.
+ */
+struct encoded_page;
+#define ENCODE_PAGE_BITS 3ul
+static __always_inline struct encoded_page *encode_page(struct page *page, unsigned long flags)
+{
+ BUILD_BUG_ON(flags > ENCODE_PAGE_BITS);
+ return (struct encoded_page *)(flags | (unsigned long)page);
+}
+
+static inline unsigned long encoded_page_flags(struct encoded_page *page)
+{
+ return ENCODE_PAGE_BITS & (unsigned long)page;
+}
+
+static inline struct page *encoded_page_ptr(struct encoded_page *page)
+{
+ return (struct page *)(~ENCODE_PAGE_BITS & (unsigned long)page);
+}
+
/**
* struct folio - Represents a contiguous set of bytes.
* @flags: Identical to the page flags.
@@ -244,6 +302,20 @@ struct page {
* @_refcount: Do not access this member directly. Use folio_ref_count()
* to find how many references there are to this folio.
* @memcg_data: Memory Control Group data.
+ * @_flags_1: For large folios, additional page flags.
+ * @_head_1: Points to the folio. Do not use.
+ * @_folio_dtor: Which destructor to use for this folio.
+ * @_folio_order: Do not use directly, call folio_order().
+ * @_compound_mapcount: Do not use directly, call folio_entire_mapcount().
+ * @_subpages_mapcount: Do not use directly, call folio_mapcount().
+ * @_pincount: Do not use directly, call folio_maybe_dma_pinned().
+ * @_folio_nr_pages: Do not use directly, call folio_nr_pages().
+ * @_flags_2: For alignment. Do not use.
+ * @_head_2: Points to the folio. Do not use.
+ * @_hugetlb_subpool: Do not use directly, use accessor in hugetlb.h.
+ * @_hugetlb_cgroup: Do not use directly, use accessor in hugetlb_cgroup.h.
+ * @_hugetlb_cgroup_rsvd: Do not use directly, use accessor in hugetlb_cgroup.h.
+ * @_hugetlb_hwpoison: Do not use directly, call raw_hwp_list_head().
*
* A folio is a physically, virtually and logically contiguous set
* of bytes. It is a power-of-two in size, and it is aligned to that
@@ -282,9 +354,34 @@ struct folio {
};
struct page page;
};
+ union {
+ struct {
+ unsigned long _flags_1;
+ unsigned long _head_1;
+ unsigned char _folio_dtor;
+ unsigned char _folio_order;
+ atomic_t _compound_mapcount;
+ atomic_t _subpages_mapcount;
+ atomic_t _pincount;
+#ifdef CONFIG_64BIT
+ unsigned int _folio_nr_pages;
+#endif
+ };
+ struct page __page_1;
+ };
+ union {
+ struct {
+ unsigned long _flags_2;
+ unsigned long _head_2;
+ void *_hugetlb_subpool;
+ void *_hugetlb_cgroup;
+ void *_hugetlb_cgroup_rsvd;
+ void *_hugetlb_hwpoison;
+ };
+ struct page __page_2;
+ };
};
-static_assert(sizeof(struct page) == sizeof(struct folio));
#define FOLIO_MATCH(pg, fl) \
static_assert(offsetof(struct page, pg) == offsetof(struct folio, fl))
FOLIO_MATCH(flags, flags);
@@ -299,6 +396,30 @@ FOLIO_MATCH(_refcount, _refcount);
FOLIO_MATCH(memcg_data, memcg_data);
#endif
#undef FOLIO_MATCH
+#define FOLIO_MATCH(pg, fl) \
+ static_assert(offsetof(struct folio, fl) == \
+ offsetof(struct page, pg) + sizeof(struct page))
+FOLIO_MATCH(flags, _flags_1);
+FOLIO_MATCH(compound_head, _head_1);
+FOLIO_MATCH(compound_dtor, _folio_dtor);
+FOLIO_MATCH(compound_order, _folio_order);
+FOLIO_MATCH(compound_mapcount, _compound_mapcount);
+FOLIO_MATCH(subpages_mapcount, _subpages_mapcount);
+FOLIO_MATCH(compound_pincount, _pincount);
+#ifdef CONFIG_64BIT
+FOLIO_MATCH(compound_nr, _folio_nr_pages);
+#endif
+#undef FOLIO_MATCH
+#define FOLIO_MATCH(pg, fl) \
+ static_assert(offsetof(struct folio, fl) == \
+ offsetof(struct page, pg) + 2 * sizeof(struct page))
+FOLIO_MATCH(flags, _flags_2);
+FOLIO_MATCH(compound_head, _head_2);
+FOLIO_MATCH(hugetlb_subpool, _hugetlb_subpool);
+FOLIO_MATCH(hugetlb_cgroup, _hugetlb_cgroup);
+FOLIO_MATCH(hugetlb_cgroup_rsvd, _hugetlb_cgroup_rsvd);
+FOLIO_MATCH(hugetlb_hwpoison, _hugetlb_hwpoison);
+#undef FOLIO_MATCH
static inline atomic_t *folio_mapcount_ptr(struct folio *folio)
{
@@ -306,11 +427,22 @@ static inline atomic_t *folio_mapcount_ptr(struct folio *folio)
return &tail->compound_mapcount;
}
+static inline atomic_t *folio_subpages_mapcount_ptr(struct folio *folio)
+{
+ struct page *tail = &folio->page + 1;
+ return &tail->subpages_mapcount;
+}
+
static inline atomic_t *compound_mapcount_ptr(struct page *page)
{
return &page[1].compound_mapcount;
}
+static inline atomic_t *subpages_mapcount_ptr(struct page *page)
+{
+ return &page[1].subpages_mapcount;
+}
+
static inline atomic_t *compound_pincount_ptr(struct page *page)
{
return &page[1].compound_pincount;
@@ -407,21 +539,6 @@ struct vm_area_struct {
unsigned long vm_end; /* The first byte after our end address
within vm_mm. */
- /* linked list of VM areas per task, sorted by address */
- struct vm_area_struct *vm_next, *vm_prev;
-
- struct rb_node vm_rb;
-
- /*
- * Largest free memory gap in bytes to the left of this VMA.
- * Either between this VMA and vma->vm_prev, or between one of the
- * VMAs below us in the VMA rbtree and its ->vm_prev. This helps
- * get_unmapped_area find a free area of the right size.
- */
- unsigned long rb_subtree_gap;
-
- /* Second cache line starts here. */
-
struct mm_struct *vm_mm; /* The address space we belong to. */
/*
@@ -435,21 +552,11 @@ struct vm_area_struct {
* For areas with an address space and backing store,
* linkage into the address_space->i_mmap interval tree.
*
- * For private anonymous mappings, a pointer to a null terminated string
- * containing the name given to the vma, or NULL if unnamed.
*/
-
- union {
- struct {
- struct rb_node rb;
- unsigned long rb_subtree_last;
- } shared;
- /*
- * Serialized by mmap_sem. Never use directly because it is
- * valid only when vm_file is NULL. Use anon_vma_name instead.
- */
- struct anon_vma_name *anon_name;
- };
+ struct {
+ struct rb_node rb;
+ unsigned long rb_subtree_last;
+ } shared;
/*
* A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
@@ -470,6 +577,14 @@ struct vm_area_struct {
struct file * vm_file; /* File we map to (can be NULL). */
void * vm_private_data; /* was vm_pte (shared mem) */
+#ifdef CONFIG_ANON_VMA_NAME
+ /*
+ * For private and shared anonymous mappings, a pointer to a null
+ * terminated string containing the name given to the vma, or NULL if
+ * unnamed. Serialized by mmap_sem. Use anon_vma_name to access.
+ */
+ struct anon_vma_name *anon_name;
+#endif
#ifdef CONFIG_SWAP
atomic_long_t swap_readahead_info;
#endif
@@ -485,9 +600,7 @@ struct vm_area_struct {
struct kioctx_table;
struct mm_struct {
struct {
- struct vm_area_struct *mmap; /* list of VMAs */
- struct rb_root mm_rb;
- u64 vmacache_seqnum; /* per-thread vmacache */
+ struct maple_tree mm_mt;
#ifdef CONFIG_MMU
unsigned long (*get_unmapped_area) (struct file *filp,
unsigned long addr, unsigned long len,
@@ -501,7 +614,6 @@ struct mm_struct {
unsigned long mmap_compat_legacy_base;
#endif
unsigned long task_size; /* size of task vm space */
- unsigned long highest_vm_end; /* highest vma end address */
pgd_t * pgd;
#ifdef CONFIG_MEMBARRIER
@@ -589,11 +701,7 @@ struct mm_struct {
unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
- /*
- * Special counters, in some configurations protected by the
- * page_table_lock, in other configurations by being atomic.
- */
- struct mm_rss_stat rss_stat;
+ struct percpu_counter rss_stat[NR_MM_COUNTERS];
struct linux_binfmt *binfmt;
@@ -631,22 +739,22 @@ struct mm_struct {
#endif
#ifdef CONFIG_NUMA_BALANCING
/*
- * numa_next_scan is the next time that the PTEs will be marked
- * pte_numa. NUMA hinting faults will gather statistics and
- * migrate pages to new nodes if necessary.
+ * numa_next_scan is the next time that PTEs will be remapped
+ * PROT_NONE to trigger NUMA hinting faults; such faults gather
+ * statistics and migrate pages to new nodes if necessary.
*/
unsigned long numa_next_scan;
- /* Restart point for scanning and setting pte_numa */
+ /* Restart point for scanning and remapping PTEs. */
unsigned long numa_scan_offset;
- /* numa_scan_seq prevents two threads setting pte_numa */
+ /* numa_scan_seq prevents two threads remapping PTEs. */
int numa_scan_seq;
#endif
/*
* An operation with batched TLB flushing is going on. Anything
* that can move process memory needs to flush the TLB when
- * moving a PROT_NONE or PROT_NUMA mapped page.
+ * moving a PROT_NONE mapped page.
*/
atomic_t tlb_flush_pending;
#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
@@ -671,7 +779,28 @@ struct mm_struct {
* merging.
*/
unsigned long ksm_merging_pages;
+ /*
+ * Represent how many pages are checked for ksm merging
+ * including merged and not merged.
+ */
+ unsigned long ksm_rmap_items;
+#endif
+#ifdef CONFIG_LRU_GEN
+ struct {
+ /* this mm_struct is on lru_gen_mm_list */
+ struct list_head list;
+ /*
+ * Set when switching to this mm_struct, as a hint of
+ * whether it has been used since the last time per-node
+ * page table walkers cleared the corresponding bits.
+ */
+ unsigned long bitmap;
+#ifdef CONFIG_MEMCG
+ /* points to the memcg of "owner" above */
+ struct mem_cgroup *memcg;
#endif
+ } lru_gen;
+#endif /* CONFIG_LRU_GEN */
} __randomize_layout;
/*
@@ -681,6 +810,7 @@ struct mm_struct {
unsigned long cpu_bitmap[];
};
+#define MM_MT_FLAGS (MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN)
extern struct mm_struct init_mm;
/* Pointer magic because the dynamic array size confuses some compilers. */
@@ -698,6 +828,87 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
return (struct cpumask *)&mm->cpu_bitmap;
}
+#ifdef CONFIG_LRU_GEN
+
+struct lru_gen_mm_list {
+ /* mm_struct list for page table walkers */
+ struct list_head fifo;
+ /* protects the list above */
+ spinlock_t lock;
+};
+
+void lru_gen_add_mm(struct mm_struct *mm);
+void lru_gen_del_mm(struct mm_struct *mm);
+#ifdef CONFIG_MEMCG
+void lru_gen_migrate_mm(struct mm_struct *mm);
+#endif
+
+static inline void lru_gen_init_mm(struct mm_struct *mm)
+{
+ INIT_LIST_HEAD(&mm->lru_gen.list);
+ mm->lru_gen.bitmap = 0;
+#ifdef CONFIG_MEMCG
+ mm->lru_gen.memcg = NULL;
+#endif
+}
+
+static inline void lru_gen_use_mm(struct mm_struct *mm)
+{
+ /*
+ * When the bitmap is set, page reclaim knows this mm_struct has been
+ * used since the last time it cleared the bitmap. So it might be worth
+ * walking the page tables of this mm_struct to clear the accessed bit.
+ */
+ WRITE_ONCE(mm->lru_gen.bitmap, -1);
+}
+
+#else /* !CONFIG_LRU_GEN */
+
+static inline void lru_gen_add_mm(struct mm_struct *mm)
+{
+}
+
+static inline void lru_gen_del_mm(struct mm_struct *mm)
+{
+}
+
+#ifdef CONFIG_MEMCG
+static inline void lru_gen_migrate_mm(struct mm_struct *mm)
+{
+}
+#endif
+
+static inline void lru_gen_init_mm(struct mm_struct *mm)
+{
+}
+
+static inline void lru_gen_use_mm(struct mm_struct *mm)
+{
+}
+
+#endif /* CONFIG_LRU_GEN */
+
+struct vma_iterator {
+ struct ma_state mas;
+};
+
+#define VMA_ITERATOR(name, __mm, __addr) \
+ struct vma_iterator name = { \
+ .mas = { \
+ .tree = &(__mm)->mm_mt, \
+ .index = __addr, \
+ .node = MAS_START, \
+ }, \
+ }
+
+static inline void vma_iter_init(struct vma_iterator *vmi,
+ struct mm_struct *mm, unsigned long addr)
+{
+ vmi->mas.tree = &mm->mm_mt;
+ vmi->mas.index = addr;
+ vmi->mas.node = MAS_START;
+}
+
struct mmu_gather;
extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm);
extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm);
@@ -721,7 +932,6 @@ typedef __bitwise unsigned int vm_fault_t;
* @VM_FAULT_OOM: Out Of Memory
* @VM_FAULT_SIGBUS: Bad access
* @VM_FAULT_MAJOR: Page read from storage
- * @VM_FAULT_WRITE: Special case for get_user_pages
* @VM_FAULT_HWPOISON: Hit poisoned small page
* @VM_FAULT_HWPOISON_LARGE: Hit poisoned large page. Index encoded
* in upper bits
@@ -742,7 +952,6 @@ enum vm_fault_reason {
VM_FAULT_OOM = (__force vm_fault_t)0x000001,
VM_FAULT_SIGBUS = (__force vm_fault_t)0x000002,
VM_FAULT_MAJOR = (__force vm_fault_t)0x000004,
- VM_FAULT_WRITE = (__force vm_fault_t)0x000008,
VM_FAULT_HWPOISON = (__force vm_fault_t)0x000010,
VM_FAULT_HWPOISON_LARGE = (__force vm_fault_t)0x000020,
VM_FAULT_SIGSEGV = (__force vm_fault_t)0x000040,
@@ -768,7 +977,6 @@ enum vm_fault_reason {
{ VM_FAULT_OOM, "OOM" }, \
{ VM_FAULT_SIGBUS, "SIGBUS" }, \
{ VM_FAULT_MAJOR, "MAJOR" }, \
- { VM_FAULT_WRITE, "WRITE" }, \
{ VM_FAULT_HWPOISON, "HWPOISON" }, \
{ VM_FAULT_HWPOISON_LARGE, "HWPOISON_LARGE" }, \
{ VM_FAULT_SIGSEGV, "SIGSEGV" }, \
@@ -831,9 +1039,9 @@ typedef struct {
* @FAULT_FLAG_REMOTE: The fault is not for current task/mm.
* @FAULT_FLAG_INSTRUCTION: The fault was during an instruction fetch.
* @FAULT_FLAG_INTERRUPTIBLE: The fault can be interrupted by non-fatal signals.
- * @FAULT_FLAG_UNSHARE: The fault is an unsharing request to unshare (and mark
- * exclusive) a possibly shared anonymous page that is
- * mapped R/O.
+ * @FAULT_FLAG_UNSHARE: The fault is an unsharing request to break COW in a
+ * COW mapping, making sure that an exclusive anon page is
+ * mapped after the fault.
* @FAULT_FLAG_ORIG_PTE_VALID: whether the fault has vmf->orig_pte cached.
* We should only access orig_pte if this flag set.
*
@@ -858,7 +1066,7 @@ typedef struct {
*
* The combination FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE is illegal.
* FAULT_FLAG_UNSHARE is ignored and treated like an ordinary read fault when
- * no existing R/O-mapped anonymous page is encountered.
+ * applied to mappings that are not COW mappings.
*/
enum fault_flag {
FAULT_FLAG_WRITE = 1 << 0,
diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h
index c1bc6731125c..5414b5c6a103 100644
--- a/include/linux/mm_types_task.h
+++ b/include/linux/mm_types_task.h
@@ -25,18 +25,6 @@
#define ALLOC_SPLIT_PTLOCKS (SPINLOCK_SIZE > BITS_PER_LONG/8)
/*
- * The per task VMA cache array:
- */
-#define VMACACHE_BITS 2
-#define VMACACHE_SIZE (1U << VMACACHE_BITS)
-#define VMACACHE_MASK (VMACACHE_SIZE - 1)
-
-struct vmacache {
- u64 seqnum;
- struct vm_area_struct *vmas[VMACACHE_SIZE];
-};
-
-/*
* When updating this, please also update struct resident_page_types[] in
* kernel/fork.c
*/
@@ -48,19 +36,6 @@ enum {
NR_MM_COUNTERS
};
-#if USE_SPLIT_PTE_PTLOCKS && defined(CONFIG_MMU)
-#define SPLIT_RSS_COUNTING
-/* per-thread cached information, */
-struct task_rss_stat {
- int events; /* for synchronization threshold */
- int count[NR_MM_COUNTERS];
-};
-#endif /* USE_SPLIT_PTE_PTLOCKS */
-
-struct mm_rss_stat {
- atomic_long_t count[NR_MM_COUNTERS];
-};
-
struct page_frag {
struct page *page;
#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 8a30de08e913..c726ea781255 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -293,6 +293,7 @@ struct mmc_card {
#define MMC_QUIRK_BROKEN_IRQ_POLLING (1<<11) /* Polling SDIO_CCCR_INTx could create a fake interrupt */
#define MMC_QUIRK_TRIM_BROKEN (1<<12) /* Skip trim */
#define MMC_QUIRK_BROKEN_HPI (1<<13) /* Disable broken HPI support */
+#define MMC_QUIRK_BROKEN_SD_DISCARD (1<<14) /* Disable broken SD discard support */
bool reenable_cmdq; /* Re-enable Command Queue */
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index eb8bc5b9b0b7..8fdd3cf971a3 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -476,7 +476,7 @@ struct mmc_host {
unsigned int sdio_irqs;
struct task_struct *sdio_irq_thread;
- struct delayed_work sdio_irq_work;
+ struct work_struct sdio_irq_work;
bool sdio_irq_pending;
atomic_t sdio_irq_thread_abort;
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 9c50bc40f8ff..6f7993803ee7 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -451,7 +451,7 @@ static inline bool mmc_ready_for_data(u32 status)
#define MMC_SECURE_TRIM1_ARG 0x80000001
#define MMC_SECURE_TRIM2_ARG 0x80008000
#define MMC_SECURE_ARGS 0x80000000
-#define MMC_TRIM_ARGS 0x00008001
+#define MMC_TRIM_OR_DISCARD_ARGS 0x00008003
#define mmc_driver_type_mask(n) (1 << (n))
diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h
index 53f0efa0bccf..74f9d9a6d330 100644
--- a/include/linux/mmc/sdio_ids.h
+++ b/include/linux/mmc/sdio_ids.h
@@ -74,6 +74,7 @@
#define SDIO_DEVICE_ID_BROADCOM_43362 0xa962
#define SDIO_DEVICE_ID_BROADCOM_43364 0xa9a4
#define SDIO_DEVICE_ID_BROADCOM_43430 0xa9a6
+#define SDIO_DEVICE_ID_BROADCOM_CYPRESS_43439 0xa9af
#define SDIO_DEVICE_ID_BROADCOM_43455 0xa9bf
#define SDIO_DEVICE_ID_BROADCOM_CYPRESS_43752 0xaae8
diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index 15ae78cd2853..b8728d11c949 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -94,6 +94,12 @@ void dump_mm(const struct mm_struct *mm);
#define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond)
#endif
+#ifdef CONFIG_DEBUG_VM_IRQSOFF
+#define VM_WARN_ON_IRQS_ENABLED() WARN_ON_ONCE(!irqs_disabled())
+#else
+#define VM_WARN_ON_IRQS_ENABLED() do { } while (0)
+#endif
+
#ifdef CONFIG_DEBUG_VIRTUAL
#define VIRTUAL_BUG_ON(cond) BUG_ON(cond)
#else
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index e24b40c52468..cd28a100d9e4 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -24,10 +24,10 @@
#include <asm/page.h>
/* Free memory management - zoned buddy allocator. */
-#ifndef CONFIG_FORCE_MAX_ZONEORDER
+#ifndef CONFIG_ARCH_FORCE_MAX_ORDER
#define MAX_ORDER 11
#else
-#define MAX_ORDER CONFIG_FORCE_MAX_ZONEORDER
+#define MAX_ORDER CONFIG_ARCH_FORCE_MAX_ORDER
#endif
#define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1))
@@ -121,20 +121,6 @@ static inline bool free_area_empty(struct free_area *area, int migratetype)
struct pglist_data;
-/*
- * Add a wild amount of padding here to ensure data fall into separate
- * cachelines. There are very few zone structures in the machine, so space
- * consumption is not a concern here.
- */
-#if defined(CONFIG_SMP)
-struct zone_padding {
- char x[0];
-} ____cacheline_internodealigned_in_smp;
-#define ZONE_PADDING(name) struct zone_padding name;
-#else
-#define ZONE_PADDING(name)
-#endif
-
#ifdef CONFIG_NUMA
enum numa_stat_item {
NUMA_HIT, /* allocated in intended node */
@@ -216,11 +202,13 @@ enum node_stat_item {
NR_KERNEL_SCS_KB, /* measured in KiB */
#endif
NR_PAGETABLE, /* used for pagetables */
+ NR_SECONDARY_PAGETABLE, /* secondary pagetables, e.g. KVM pagetables */
#ifdef CONFIG_SWAP
NR_SWAPCACHE,
#endif
#ifdef CONFIG_NUMA_BALANCING
PGPROMOTE_SUCCESS, /* promote successfully */
+ PGPROMOTE_CANDIDATE, /* candidate pages to promote */
#endif
NR_VM_NODE_STAT_ITEMS
};
@@ -306,6 +294,8 @@ static inline bool is_active_lru(enum lru_list lru)
return (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE);
}
+#define WORKINGSET_ANON 0
+#define WORKINGSET_FILE 1
#define ANON_AND_FILE 2
enum lruvec_flags {
@@ -314,6 +304,207 @@ enum lruvec_flags {
*/
};
+#endif /* !__GENERATING_BOUNDS_H */
+
+/*
+ * Evictable pages are divided into multiple generations. The youngest and the
+ * oldest generation numbers, max_seq and min_seq, are monotonically increasing.
+ * They form a sliding window of a variable size [MIN_NR_GENS, MAX_NR_GENS]. An
+ * offset within MAX_NR_GENS, i.e., gen, indexes the LRU list of the
+ * corresponding generation. The gen counter in folio->flags stores gen+1 while
+ * a page is on one of lrugen->lists[]. Otherwise it stores 0.
+ *
+ * A page is added to the youngest generation on faulting. The aging needs to
+ * check the accessed bit at least twice before handing this page over to the
+ * eviction. The first check takes care of the accessed bit set on the initial
+ * fault; the second check makes sure this page hasn't been used since then.
+ * This process, AKA second chance, requires a minimum of two generations,
+ * hence MIN_NR_GENS. And to maintain ABI compatibility with the active/inactive
+ * LRU, e.g., /proc/vmstat, these two generations are considered active; the
+ * rest of generations, if they exist, are considered inactive. See
+ * lru_gen_is_active().
+ *
+ * PG_active is always cleared while a page is on one of lrugen->lists[] so that
+ * the aging needs not to worry about it. And it's set again when a page
+ * considered active is isolated for non-reclaiming purposes, e.g., migration.
+ * See lru_gen_add_folio() and lru_gen_del_folio().
+ *
+ * MAX_NR_GENS is set to 4 so that the multi-gen LRU can support twice the
+ * number of categories of the active/inactive LRU when keeping track of
+ * accesses through page tables. This requires order_base_2(MAX_NR_GENS+1) bits
+ * in folio->flags.
+ */
+#define MIN_NR_GENS 2U
+#define MAX_NR_GENS 4U
+
+/*
+ * Each generation is divided into multiple tiers. A page accessed N times
+ * through file descriptors is in tier order_base_2(N). A page in the first tier
+ * (N=0,1) is marked by PG_referenced unless it was faulted in through page
+ * tables or read ahead. A page in any other tier (N>1) is marked by
+ * PG_referenced and PG_workingset. This implies a minimum of two tiers is
+ * supported without using additional bits in folio->flags.
+ *
+ * In contrast to moving across generations which requires the LRU lock, moving
+ * across tiers only involves atomic operations on folio->flags and therefore
+ * has a negligible cost in the buffered access path. In the eviction path,
+ * comparisons of refaulted/(evicted+protected) from the first tier and the
+ * rest infer whether pages accessed multiple times through file descriptors
+ * are statistically hot and thus worth protecting.
+ *
+ * MAX_NR_TIERS is set to 4 so that the multi-gen LRU can support twice the
+ * number of categories of the active/inactive LRU when keeping track of
+ * accesses through file descriptors. This uses MAX_NR_TIERS-2 spare bits in
+ * folio->flags.
+ */
+#define MAX_NR_TIERS 4U
+
+#ifndef __GENERATING_BOUNDS_H
+
+struct lruvec;
+struct page_vma_mapped_walk;
+
+#define LRU_GEN_MASK ((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF)
+#define LRU_REFS_MASK ((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF)
+
+#ifdef CONFIG_LRU_GEN
+
+enum {
+ LRU_GEN_ANON,
+ LRU_GEN_FILE,
+};
+
+enum {
+ LRU_GEN_CORE,
+ LRU_GEN_MM_WALK,
+ LRU_GEN_NONLEAF_YOUNG,
+ NR_LRU_GEN_CAPS
+};
+
+#define MIN_LRU_BATCH BITS_PER_LONG
+#define MAX_LRU_BATCH (MIN_LRU_BATCH * 64)
+
+/* whether to keep historical stats from evicted generations */
+#ifdef CONFIG_LRU_GEN_STATS
+#define NR_HIST_GENS MAX_NR_GENS
+#else
+#define NR_HIST_GENS 1U
+#endif
+
+/*
+ * The youngest generation number is stored in max_seq for both anon and file
+ * types as they are aged on an equal footing. The oldest generation numbers are
+ * stored in min_seq[] separately for anon and file types as clean file pages
+ * can be evicted regardless of swap constraints.
+ *
+ * Normally anon and file min_seq are in sync. But if swapping is constrained,
+ * e.g., out of swap space, file min_seq is allowed to advance and leave anon
+ * min_seq behind.
+ *
+ * The number of pages in each generation is eventually consistent and therefore
+ * can be transiently negative when reset_batch_size() is pending.
+ */
+struct lru_gen_struct {
+ /* the aging increments the youngest generation number */
+ unsigned long max_seq;
+ /* the eviction increments the oldest generation numbers */
+ unsigned long min_seq[ANON_AND_FILE];
+ /* the birth time of each generation in jiffies */
+ unsigned long timestamps[MAX_NR_GENS];
+ /* the multi-gen LRU lists, lazily sorted on eviction */
+ struct list_head lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
+ /* the multi-gen LRU sizes, eventually consistent */
+ long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
+ /* the exponential moving average of refaulted */
+ unsigned long avg_refaulted[ANON_AND_FILE][MAX_NR_TIERS];
+ /* the exponential moving average of evicted+protected */
+ unsigned long avg_total[ANON_AND_FILE][MAX_NR_TIERS];
+ /* the first tier doesn't need protection, hence the minus one */
+ unsigned long protected[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS - 1];
+ /* can be modified without holding the LRU lock */
+ atomic_long_t evicted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
+ atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
+ /* whether the multi-gen LRU is enabled */
+ bool enabled;
+};
+
+enum {
+ MM_LEAF_TOTAL, /* total leaf entries */
+ MM_LEAF_OLD, /* old leaf entries */
+ MM_LEAF_YOUNG, /* young leaf entries */
+ MM_NONLEAF_TOTAL, /* total non-leaf entries */
+ MM_NONLEAF_FOUND, /* non-leaf entries found in Bloom filters */
+ MM_NONLEAF_ADDED, /* non-leaf entries added to Bloom filters */
+ NR_MM_STATS
+};
+
+/* double-buffering Bloom filters */
+#define NR_BLOOM_FILTERS 2
+
+struct lru_gen_mm_state {
+ /* set to max_seq after each iteration */
+ unsigned long seq;
+ /* where the current iteration continues (inclusive) */
+ struct list_head *head;
+ /* where the last iteration ended (exclusive) */
+ struct list_head *tail;
+ /* to wait for the last page table walker to finish */
+ struct wait_queue_head wait;
+ /* Bloom filters flip after each iteration */
+ unsigned long *filters[NR_BLOOM_FILTERS];
+ /* the mm stats for debugging */
+ unsigned long stats[NR_HIST_GENS][NR_MM_STATS];
+ /* the number of concurrent page table walkers */
+ int nr_walkers;
+};
+
+struct lru_gen_mm_walk {
+ /* the lruvec under reclaim */
+ struct lruvec *lruvec;
+ /* unstable max_seq from lru_gen_struct */
+ unsigned long max_seq;
+ /* the next address within an mm to scan */
+ unsigned long next_addr;
+ /* to batch promoted pages */
+ int nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
+ /* to batch the mm stats */
+ int mm_stats[NR_MM_STATS];
+ /* total batched items */
+ int batched;
+ bool can_swap;
+ bool force_scan;
+};
+
+void lru_gen_init_lruvec(struct lruvec *lruvec);
+void lru_gen_look_around(struct page_vma_mapped_walk *pvmw);
+
+#ifdef CONFIG_MEMCG
+void lru_gen_init_memcg(struct mem_cgroup *memcg);
+void lru_gen_exit_memcg(struct mem_cgroup *memcg);
+#endif
+
+#else /* !CONFIG_LRU_GEN */
+
+static inline void lru_gen_init_lruvec(struct lruvec *lruvec)
+{
+}
+
+static inline void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
+{
+}
+
+#ifdef CONFIG_MEMCG
+static inline void lru_gen_init_memcg(struct mem_cgroup *memcg)
+{
+}
+
+static inline void lru_gen_exit_memcg(struct mem_cgroup *memcg)
+{
+}
+#endif
+
+#endif /* CONFIG_LRU_GEN */
+
struct lruvec {
struct list_head lists[NR_LRU_LISTS];
/* per lruvec lru_lock for memcg */
@@ -331,6 +522,12 @@ struct lruvec {
unsigned long refaults[ANON_AND_FILE];
/* Various lruvec state flags (enum lruvec_flags) */
unsigned long flags;
+#ifdef CONFIG_LRU_GEN
+ /* evictable pages divided into generations */
+ struct lru_gen_struct lrugen;
+ /* to concurrently iterate lru_gen_mm_list */
+ struct lru_gen_mm_state mm_state;
+#endif
#ifdef CONFIG_MEMCG
struct pglist_data *pgdat;
#endif
@@ -368,13 +565,6 @@ enum zone_watermarks {
#define NR_LOWORDER_PCP_LISTS (MIGRATE_PCPTYPES * (PAGE_ALLOC_COSTLY_ORDER + 1))
#define NR_PCP_LISTS (NR_LOWORDER_PCP_LISTS + NR_PCP_THP)
-/*
- * Shift to encode migratetype and order in the same integer, with order
- * in the least significant bits.
- */
-#define NR_PCP_ORDER_WIDTH 8
-#define NR_PCP_ORDER_MASK ((1<<NR_PCP_ORDER_WIDTH) - 1)
-
#define min_wmark_pages(z) (z->_watermark[WMARK_MIN] + z->watermark_boost)
#define low_wmark_pages(z) (z->_watermark[WMARK_LOW] + z->watermark_boost)
#define high_wmark_pages(z) (z->_watermark[WMARK_HIGH] + z->watermark_boost)
@@ -627,7 +817,7 @@ struct zone {
int initialized;
/* Write-intensive fields used from the page allocator */
- ZONE_PADDING(_pad1_)
+ CACHELINE_PADDING(_pad1_);
/* free areas of different sizes */
struct free_area free_area[MAX_ORDER];
@@ -639,7 +829,7 @@ struct zone {
spinlock_t lock;
/* Write-intensive fields used by compaction and vmstats. */
- ZONE_PADDING(_pad2_)
+ CACHELINE_PADDING(_pad2_);
/*
* When free pages are below this point, additional steps are taken
@@ -676,7 +866,7 @@ struct zone {
bool contiguous;
- ZONE_PADDING(_pad3_)
+ CACHELINE_PADDING(_pad3_);
/* Zone statistics */
atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS];
@@ -746,6 +936,8 @@ static inline bool zone_is_empty(struct zone *zone)
#define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH)
#define LAST_CPUPID_PGOFF (ZONES_PGOFF - LAST_CPUPID_WIDTH)
#define KASAN_TAG_PGOFF (LAST_CPUPID_PGOFF - KASAN_TAG_WIDTH)
+#define LRU_GEN_PGOFF (KASAN_TAG_PGOFF - LRU_GEN_WIDTH)
+#define LRU_REFS_PGOFF (LRU_GEN_PGOFF - LRU_REFS_WIDTH)
/*
* Define the bit shifts to access each section. For non-existent
@@ -794,6 +986,25 @@ static inline bool is_zone_device_page(const struct page *page)
{
return page_zonenum(page) == ZONE_DEVICE;
}
+
+/*
+ * Consecutive zone device pages should not be merged into the same sgl
+ * or bvec segment with other types of pages or if they belong to different
+ * pgmaps. Otherwise getting the pgmap of a given segment is not possible
+ * without scanning the entire segment. This helper returns true either if
+ * both pages are not zone device pages or both pages are zone device pages
+ * with the same pgmap.
+ */
+static inline bool zone_device_pages_have_same_pgmap(const struct page *a,
+ const struct page *b)
+{
+ if (is_zone_device_page(a) != is_zone_device_page(b))
+ return false;
+ if (!is_zone_device_page(a))
+ return true;
+ return a->pgmap == b->pgmap;
+}
+
extern void memmap_init_zone_device(struct zone *, unsigned long,
unsigned long, struct dev_pagemap *);
#else
@@ -801,6 +1012,11 @@ static inline bool is_zone_device_page(const struct page *page)
{
return false;
}
+static inline bool zone_device_pages_have_same_pgmap(const struct page *a,
+ const struct page *b)
+{
+ return true;
+}
#endif
static inline bool folio_is_zone_device(const struct folio *folio)
@@ -953,8 +1169,10 @@ typedef struct pglist_data {
atomic_t nr_writeback_throttled;/* nr of writeback-throttled tasks */
unsigned long nr_reclaim_start; /* nr pages written while throttled
* when throttling started. */
- struct task_struct *kswapd; /* Protected by
- mem_hotplug_begin/done() */
+#ifdef CONFIG_MEMORY_HOTPLUG
+ struct mutex kswapd_lock;
+#endif
+ struct task_struct *kswapd; /* Protected by kswapd_lock */
int kswapd_order;
enum zone_type kswapd_highest_zoneidx;
@@ -982,7 +1200,7 @@ typedef struct pglist_data {
#endif /* CONFIG_NUMA */
/* Write-intensive fields used by page reclaim */
- ZONE_PADDING(_pad1_)
+ CACHELINE_PADDING(_pad1_);
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
/*
@@ -996,6 +1214,21 @@ typedef struct pglist_data {
struct deferred_split deferred_split_queue;
#endif
+#ifdef CONFIG_NUMA_BALANCING
+ /* start time in ms of current promote rate limit period */
+ unsigned int nbp_rl_start;
+ /* number of promote candidate pages at start time of current rate limit period */
+ unsigned long nbp_rl_nr_cand;
+ /* promote threshold in ms */
+ unsigned int nbp_threshold;
+ /* start time in ms of current promote threshold adjustment period */
+ unsigned int nbp_th_start;
+ /*
+ * number of promote candidate pages at start time of current promote
+ * threshold adjustment period
+ */
+ unsigned long nbp_th_nr_cand;
+#endif
/* Fields commonly accessed by the page reclaim scanner */
/*
@@ -1007,11 +1240,19 @@ typedef struct pglist_data {
unsigned long flags;
- ZONE_PADDING(_pad2_)
+#ifdef CONFIG_LRU_GEN
+ /* kswap mm walk data */
+ struct lru_gen_mm_walk mm_walk;
+#endif
+
+ CACHELINE_PADDING(_pad2_);
/* Per-node vmstats */
struct per_cpu_nodestat __percpu *per_cpu_nodestats;
atomic_long_t vm_stat[NR_VM_NODE_STAT_ITEMS];
+#ifdef CONFIG_NUMA
+ struct memory_tier __rcu *memtier;
+#endif
} pg_data_t;
#define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages)
@@ -1025,11 +1266,6 @@ static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat)
return pgdat->node_start_pfn + pgdat->node_spanned_pages;
}
-static inline bool pgdat_is_empty(pg_data_t *pgdat)
-{
- return !pgdat->node_start_pfn && !pgdat->node_spanned_pages;
-}
-
#include <linux/memory_hotplug.h>
void build_all_zonelists(pg_data_t *pgdat);
diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h
index f6e5369d2928..092c52aa6c2c 100644
--- a/include/linux/mnt_idmapping.h
+++ b/include/linux/mnt_idmapping.h
@@ -5,12 +5,10 @@
#include <linux/types.h>
#include <linux/uidgid.h>
+struct mnt_idmap;
struct user_namespace;
-/*
- * Carries the initial idmapping of 0:0:4294967295 which is an identity
- * mapping. This means that {g,u}id 0 is mapped to {g,u}id 0, {g,u}id 1 is
- * mapped to {g,u}id 1, [...], {g,u}id 1000 to {g,u}id 1000, [...].
- */
+
+extern struct mnt_idmap nop_mnt_idmap;
extern struct user_namespace init_user_ns;
typedef struct {
@@ -98,6 +96,26 @@ static inline bool vfsgid_eq_kgid(vfsgid_t vfsgid, kgid_t kgid)
return vfsgid_valid(vfsgid) && __vfsgid_val(vfsgid) == __kgid_val(kgid);
}
+static inline bool vfsuid_gt_kuid(vfsuid_t vfsuid, kuid_t kuid)
+{
+ return __vfsuid_val(vfsuid) > __kuid_val(kuid);
+}
+
+static inline bool vfsgid_gt_kgid(vfsgid_t vfsgid, kgid_t kgid)
+{
+ return __vfsgid_val(vfsgid) > __kgid_val(kgid);
+}
+
+static inline bool vfsuid_lt_kuid(vfsuid_t vfsuid, kuid_t kuid)
+{
+ return __vfsuid_val(vfsuid) < __kuid_val(kuid);
+}
+
+static inline bool vfsgid_lt_kgid(vfsgid_t vfsgid, kgid_t kgid)
+{
+ return __vfsgid_val(vfsgid) < __kgid_val(kgid);
+}
+
/*
* vfs{g,u}ids are created from k{g,u}ids.
* We don't allow them to be created from regular {u,g}id.
@@ -208,13 +226,6 @@ static inline vfsuid_t make_vfsuid(struct user_namespace *mnt_userns,
return VFSUIDT_INIT(make_kuid(mnt_userns, uid));
}
-static inline kuid_t mapped_kuid_fs(struct user_namespace *mnt_userns,
- struct user_namespace *fs_userns,
- kuid_t kuid)
-{
- return AS_KUIDT(make_vfsuid(mnt_userns, fs_userns, kuid));
-}
-
/**
* make_vfsgid - map a filesystem kgid into a mnt_userns
* @mnt_userns: the mount's idmapping
@@ -253,13 +264,6 @@ static inline vfsgid_t make_vfsgid(struct user_namespace *mnt_userns,
return VFSGIDT_INIT(make_kgid(mnt_userns, gid));
}
-static inline kgid_t mapped_kgid_fs(struct user_namespace *mnt_userns,
- struct user_namespace *fs_userns,
- kgid_t kgid)
-{
- return AS_KGIDT(make_vfsgid(mnt_userns, fs_userns, kgid));
-}
-
/**
* from_vfsuid - map a vfsuid into the filesystem idmapping
* @mnt_userns: the mount's idmapping
@@ -288,33 +292,6 @@ static inline kuid_t from_vfsuid(struct user_namespace *mnt_userns,
}
/**
- * mapped_kuid_user - map a user kuid into a mnt_userns
- * @mnt_userns: the mount's idmapping
- * @fs_userns: the filesystem's idmapping
- * @kuid : kuid to be mapped
- *
- * Use the idmapping of @mnt_userns to remap a @kuid into @fs_userns. Use this
- * function when preparing a @kuid to be written to disk or inode.
- *
- * If no_idmapping() determines that this is not an idmapped mount we can
- * simply return @kuid unchanged.
- * If initial_idmapping() tells us that the filesystem is not mounted with an
- * idmapping we know the value of @kuid won't change when calling
- * make_kuid() so we can simply retrieve the value via KUIDT_INIT()
- * directly.
- *
- * Return: @kuid mapped according to @mnt_userns.
- * If @kuid has no mapping in either @mnt_userns or @fs_userns INVALID_UID is
- * returned.
- */
-static inline kuid_t mapped_kuid_user(struct user_namespace *mnt_userns,
- struct user_namespace *fs_userns,
- kuid_t kuid)
-{
- return from_vfsuid(mnt_userns, fs_userns, VFSUIDT_INIT(kuid));
-}
-
-/**
* vfsuid_has_fsmapping - check whether a vfsuid maps into the filesystem
* @mnt_userns: the mount's idmapping
* @fs_userns: the filesystem's idmapping
@@ -333,6 +310,12 @@ static inline bool vfsuid_has_fsmapping(struct user_namespace *mnt_userns,
return uid_valid(from_vfsuid(mnt_userns, fs_userns, vfsuid));
}
+static inline bool vfsuid_has_mapping(struct user_namespace *userns,
+ vfsuid_t vfsuid)
+{
+ return from_kuid(userns, AS_KUIDT(vfsuid)) != (uid_t)-1;
+}
+
/**
* vfsuid_into_kuid - convert vfsuid into kuid
* @vfsuid: the vfsuid to convert
@@ -374,33 +357,6 @@ static inline kgid_t from_vfsgid(struct user_namespace *mnt_userns,
}
/**
- * mapped_kgid_user - map a user kgid into a mnt_userns
- * @mnt_userns: the mount's idmapping
- * @fs_userns: the filesystem's idmapping
- * @kgid : kgid to be mapped
- *
- * Use the idmapping of @mnt_userns to remap a @kgid into @fs_userns. Use this
- * function when preparing a @kgid to be written to disk or inode.
- *
- * If no_idmapping() determines that this is not an idmapped mount we can
- * simply return @kgid unchanged.
- * If initial_idmapping() tells us that the filesystem is not mounted with an
- * idmapping we know the value of @kgid won't change when calling
- * make_kgid() so we can simply retrieve the value via KGIDT_INIT()
- * directly.
- *
- * Return: @kgid mapped according to @mnt_userns.
- * If @kgid has no mapping in either @mnt_userns or @fs_userns INVALID_GID is
- * returned.
- */
-static inline kgid_t mapped_kgid_user(struct user_namespace *mnt_userns,
- struct user_namespace *fs_userns,
- kgid_t kgid)
-{
- return from_vfsgid(mnt_userns, fs_userns, VFSGIDT_INIT(kgid));
-}
-
-/**
* vfsgid_has_fsmapping - check whether a vfsgid maps into the filesystem
* @mnt_userns: the mount's idmapping
* @fs_userns: the filesystem's idmapping
@@ -419,6 +375,12 @@ static inline bool vfsgid_has_fsmapping(struct user_namespace *mnt_userns,
return gid_valid(from_vfsgid(mnt_userns, fs_userns, vfsgid));
}
+static inline bool vfsgid_has_mapping(struct user_namespace *userns,
+ vfsgid_t vfsgid)
+{
+ return from_kgid(userns, AS_KGIDT(vfsgid)) != (gid_t)-1;
+}
+
/**
* vfsgid_into_kgid - convert vfsgid into kgid
* @vfsgid: the vfsgid to convert
diff --git a/include/linux/module.h b/include/linux/module.h
index 518296ea7f73..8c5909c0076c 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -27,7 +27,6 @@
#include <linux/tracepoint-defs.h>
#include <linux/srcu.h>
#include <linux/static_call_types.h>
-#include <linux/cfi.h>
#include <linux/percpu.h>
#include <asm/module.h>
@@ -132,7 +131,7 @@ extern void cleanup_module(void);
{ return initfn; } \
int init_module(void) __copy(initfn) \
__attribute__((alias(#initfn))); \
- __CFI_ADDRESSABLE(init_module, __initdata);
+ ___ADDRESSABLE(init_module, __initdata);
/* This is only required if you want to be unloadable. */
#define module_exit(exitfn) \
@@ -140,7 +139,7 @@ extern void cleanup_module(void);
{ return exitfn; } \
void cleanup_module(void) __copy(exitfn) \
__attribute__((alias(#exitfn))); \
- __CFI_ADDRESSABLE(cleanup_module, __exitdata);
+ ___ADDRESSABLE(cleanup_module, __exitdata);
#endif
@@ -387,8 +386,9 @@ struct module {
const s32 *crcs;
unsigned int num_syms;
-#ifdef CONFIG_CFI_CLANG
- cfi_check_fn cfi_check;
+#ifdef CONFIG_ARCH_USES_CFI_TRAPS
+ s32 *kcfi_traps;
+ s32 *kcfi_traps_end;
#endif
/* Kernel parameters. */
@@ -827,7 +827,6 @@ void *dereference_module_function_descriptor(struct module *mod, void *ptr)
#ifdef CONFIG_SYSFS
extern struct kset *module_kset;
extern struct kobj_type module_ktype;
-extern int module_sysfs_initialized;
#endif /* CONFIG_SYSFS */
#define symbol_request(x) try_then_request_module(symbol_get(x), "symbol:" #x)
@@ -879,8 +878,17 @@ static inline bool module_sig_ok(struct module *module)
}
#endif /* CONFIG_MODULE_SIG */
+#if defined(CONFIG_MODULES) && defined(CONFIG_KALLSYMS)
int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
struct module *, unsigned long),
void *data);
+#else
+static inline int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
+ struct module *, unsigned long),
+ void *data)
+{
+ return -EOPNOTSUPP;
+}
+#endif /* CONFIG_MODULES && CONFIG_KALLSYMS */
#endif /* _LINUX_MODULE_H */
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 55a4abaf6715..62475996fac6 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -16,6 +16,7 @@
struct super_block;
struct dentry;
struct user_namespace;
+struct mnt_idmap;
struct file_system_type;
struct fs_context;
struct file;
@@ -70,13 +71,15 @@ struct vfsmount {
struct dentry *mnt_root; /* root of the mounted tree */
struct super_block *mnt_sb; /* pointer to superblock */
int mnt_flags;
- struct user_namespace *mnt_userns;
+ struct mnt_idmap *mnt_idmap;
} __randomize_layout;
-static inline struct user_namespace *mnt_user_ns(const struct vfsmount *mnt)
+struct user_namespace *mnt_user_ns(const struct vfsmount *mnt);
+struct user_namespace *mnt_idmap_owner(const struct mnt_idmap *idmap);
+static inline struct mnt_idmap *mnt_idmap(const struct vfsmount *mnt)
{
/* Pairs with smp_store_release() in do_idmap_mount(). */
- return smp_load_acquire(&mnt->mnt_userns);
+ return smp_load_acquire(&mnt->mnt_idmap);
}
extern int mnt_want_write(struct vfsmount *mnt);
diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index 6cbbfe94348c..80b8400ab8b2 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -17,7 +17,7 @@ static inline int ip_mroute_opt(int opt)
}
int ip_mroute_setsockopt(struct sock *, int, sockptr_t, unsigned int);
-int ip_mroute_getsockopt(struct sock *, int, char __user *, int __user *);
+int ip_mroute_getsockopt(struct sock *, int, sockptr_t, sockptr_t);
int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg);
int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg);
int ip_mr_init(void);
@@ -29,8 +29,8 @@ static inline int ip_mroute_setsockopt(struct sock *sock, int optname,
return -ENOPROTOOPT;
}
-static inline int ip_mroute_getsockopt(struct sock *sock, int optname,
- char __user *optval, int __user *optlen)
+static inline int ip_mroute_getsockopt(struct sock *sk, int optname,
+ sockptr_t optval, sockptr_t optlen)
{
return -ENOPROTOOPT;
}
diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index bc351a85ce9b..8f2b307fb124 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -27,7 +27,7 @@ struct sock;
#ifdef CONFIG_IPV6_MROUTE
extern int ip6_mroute_setsockopt(struct sock *, int, sockptr_t, unsigned int);
-extern int ip6_mroute_getsockopt(struct sock *, int, char __user *, int __user *);
+extern int ip6_mroute_getsockopt(struct sock *, int, sockptr_t, sockptr_t);
extern int ip6_mr_input(struct sk_buff *skb);
extern int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg);
extern int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg);
@@ -42,7 +42,7 @@ static inline int ip6_mroute_setsockopt(struct sock *sock, int optname,
static inline
int ip6_mroute_getsockopt(struct sock *sock,
- int optname, char __user *optval, int __user *optlen)
+ int optname, sockptr_t optval, sockptr_t optlen)
{
return -ENOPROTOOPT;
}
diff --git a/include/linux/msi.h b/include/linux/msi.h
index fc918a658d48..a112b913fff9 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -13,13 +13,20 @@
*
* Regular device drivers have no business with any of these functions and
* especially storing MSI descriptor pointers in random code is considered
- * abuse. The only function which is relevant for drivers is msi_get_virq().
+ * abuse.
+ *
+ * Device driver relevant functions are available in <linux/msi_api.h>
*/
+#include <linux/irqdomain_defs.h>
#include <linux/cpumask.h>
+#include <linux/msi_api.h>
#include <linux/xarray.h>
#include <linux/mutex.h>
#include <linux/list.h>
+#include <linux/irq.h>
+#include <linux/bits.h>
+
#include <asm/msi.h>
/* Dummy shadow structures if an architecture does not define them */
@@ -68,19 +75,18 @@ struct msi_msg {
extern int pci_msi_ignore_mask;
/* Helper functions */
-struct irq_data;
struct msi_desc;
struct pci_dev;
struct platform_msi_priv_data;
struct device_attribute;
+struct irq_domain;
+struct irq_affinity_desc;
void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
#ifdef CONFIG_GENERIC_MSI_IRQ
void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg);
#else
-static inline void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg)
-{
-}
+static inline void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg) { }
#endif
typedef void (*irq_write_msi_msg_t)(struct msi_desc *desc,
@@ -120,6 +126,38 @@ struct pci_msi_desc {
};
};
+/**
+ * union msi_domain_cookie - Opaque MSI domain specific data
+ * @value: u64 value store
+ * @ptr: Pointer to domain specific data
+ * @iobase: Domain specific IOmem pointer
+ *
+ * The content of this data is implementation defined and used by the MSI
+ * domain to store domain specific information which is requried for
+ * interrupt chip callbacks.
+ */
+union msi_domain_cookie {
+ u64 value;
+ void *ptr;
+ void __iomem *iobase;
+};
+
+/**
+ * struct msi_desc_data - Generic MSI descriptor data
+ * @dcookie: Cookie for MSI domain specific data which is required
+ * for irq_chip callbacks
+ * @icookie: Cookie for the MSI interrupt instance provided by
+ * the usage site to the allocation function
+ *
+ * The content of this data is implementation defined, e.g. PCI/IMS
+ * implementations define the meaning of the data. The MSI core ignores
+ * this data completely.
+ */
+struct msi_desc_data {
+ union msi_domain_cookie dcookie;
+ union msi_instance_cookie icookie;
+};
+
#define MSI_MAX_INDEX ((unsigned int)USHRT_MAX)
/**
@@ -137,6 +175,7 @@ struct pci_msi_desc {
*
* @msi_index: Index of the msi descriptor
* @pci: PCI specific msi descriptor data
+ * @data: Generic MSI descriptor data
*/
struct msi_desc {
/* Shared device/bus type independent data */
@@ -156,7 +195,10 @@ struct msi_desc {
void *write_msi_msg_data;
u16 msi_index;
- struct pci_msi_desc pci;
+ union {
+ struct pci_msi_desc pci;
+ struct msi_desc_data data;
+ };
};
/*
@@ -171,33 +213,80 @@ enum msi_desc_filter {
MSI_DESC_ASSOCIATED,
};
+
+/**
+ * struct msi_dev_domain - The internals of MSI domain info per device
+ * @store: Xarray for storing MSI descriptor pointers
+ * @irqdomain: Pointer to a per device interrupt domain
+ */
+struct msi_dev_domain {
+ struct xarray store;
+ struct irq_domain *domain;
+};
+
/**
* msi_device_data - MSI per device data
* @properties: MSI properties which are interesting to drivers
* @platform_data: Platform-MSI specific data
* @mutex: Mutex protecting the MSI descriptor store
- * @__store: Xarray for storing MSI descriptor pointers
+ * @__domains: Internal data for per device MSI domains
* @__iter_idx: Index to search the next entry for iterators
*/
struct msi_device_data {
unsigned long properties;
struct platform_msi_priv_data *platform_data;
struct mutex mutex;
- struct xarray __store;
+ struct msi_dev_domain __domains[MSI_MAX_DEVICE_IRQDOMAINS];
unsigned long __iter_idx;
};
int msi_setup_device_data(struct device *dev);
-unsigned int msi_get_virq(struct device *dev, unsigned int index);
void msi_lock_descs(struct device *dev);
void msi_unlock_descs(struct device *dev);
-struct msi_desc *msi_first_desc(struct device *dev, enum msi_desc_filter filter);
-struct msi_desc *msi_next_desc(struct device *dev, enum msi_desc_filter filter);
+struct msi_desc *msi_domain_first_desc(struct device *dev, unsigned int domid,
+ enum msi_desc_filter filter);
/**
- * msi_for_each_desc - Iterate the MSI descriptors
+ * msi_first_desc - Get the first MSI descriptor of the default irqdomain
+ * @dev: Device to operate on
+ * @filter: Descriptor state filter
+ *
+ * Must be called with the MSI descriptor mutex held, i.e. msi_lock_descs()
+ * must be invoked before the call.
+ *
+ * Return: Pointer to the first MSI descriptor matching the search
+ * criteria, NULL if none found.
+ */
+static inline struct msi_desc *msi_first_desc(struct device *dev,
+ enum msi_desc_filter filter)
+{
+ return msi_domain_first_desc(dev, MSI_DEFAULT_DOMAIN, filter);
+}
+
+struct msi_desc *msi_next_desc(struct device *dev, unsigned int domid,
+ enum msi_desc_filter filter);
+
+/**
+ * msi_domain_for_each_desc - Iterate the MSI descriptors in a specific domain
+ *
+ * @desc: struct msi_desc pointer used as iterator
+ * @dev: struct device pointer - device to iterate
+ * @domid: The id of the interrupt domain which should be walked.
+ * @filter: Filter for descriptor selection
+ *
+ * Notes:
+ * - The loop must be protected with a msi_lock_descs()/msi_unlock_descs()
+ * pair.
+ * - It is safe to remove a retrieved MSI descriptor in the loop.
+ */
+#define msi_domain_for_each_desc(desc, dev, domid, filter) \
+ for ((desc) = msi_domain_first_desc((dev), (domid), (filter)); (desc); \
+ (desc) = msi_next_desc((dev), (domid), (filter)))
+
+/**
+ * msi_for_each_desc - Iterate the MSI descriptors in the default irqdomain
*
* @desc: struct msi_desc pointer used as iterator
* @dev: struct device pointer - device to iterate
@@ -208,9 +297,8 @@ struct msi_desc *msi_next_desc(struct device *dev, enum msi_desc_filter filter);
* pair.
* - It is safe to remove a retrieved MSI descriptor in the loop.
*/
-#define msi_for_each_desc(desc, dev, filter) \
- for ((desc) = msi_first_desc((dev), (filter)); (desc); \
- (desc) = msi_next_desc((dev), (filter)))
+#define msi_for_each_desc(desc, dev, filter) \
+ msi_domain_for_each_desc((desc), (dev), MSI_DEFAULT_DOMAIN, (filter))
#define msi_desc_to_dev(desc) ((desc)->dev)
@@ -237,34 +325,47 @@ static inline void msi_desc_set_iommu_cookie(struct msi_desc *desc,
}
#endif
-#ifdef CONFIG_PCI_MSI
-struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc);
-void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg);
-#else /* CONFIG_PCI_MSI */
-static inline void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg)
+int msi_domain_insert_msi_desc(struct device *dev, unsigned int domid,
+ struct msi_desc *init_desc);
+/**
+ * msi_insert_msi_desc - Allocate and initialize a MSI descriptor in the
+ * default irqdomain and insert it at @init_desc->msi_index
+ * @dev: Pointer to the device for which the descriptor is allocated
+ * @init_desc: Pointer to an MSI descriptor to initialize the new descriptor
+ *
+ * Return: 0 on success or an appropriate failure code.
+ */
+static inline int msi_insert_msi_desc(struct device *dev, struct msi_desc *init_desc)
{
+ return msi_domain_insert_msi_desc(dev, MSI_DEFAULT_DOMAIN, init_desc);
}
-#endif /* CONFIG_PCI_MSI */
-int msi_add_msi_desc(struct device *dev, struct msi_desc *init_desc);
-void msi_free_msi_descs_range(struct device *dev, enum msi_desc_filter filter,
- unsigned int first_index, unsigned int last_index);
+void msi_domain_free_msi_descs_range(struct device *dev, unsigned int domid,
+ unsigned int first, unsigned int last);
+
+/**
+ * msi_free_msi_descs_range - Free a range of MSI descriptors of a device
+ * in the default irqdomain
+ *
+ * @dev: Device for which to free the descriptors
+ * @first: Index to start freeing from (inclusive)
+ * @last: Last index to be freed (inclusive)
+ */
+static inline void msi_free_msi_descs_range(struct device *dev, unsigned int first,
+ unsigned int last)
+{
+ msi_domain_free_msi_descs_range(dev, MSI_DEFAULT_DOMAIN, first, last);
+}
/**
- * msi_free_msi_descs - Free MSI descriptors of a device
+ * msi_free_msi_descs - Free all MSI descriptors of a device in the default irqdomain
* @dev: Device to free the descriptors
*/
static inline void msi_free_msi_descs(struct device *dev)
{
- msi_free_msi_descs_range(dev, MSI_DESC_ALL, 0, MSI_MAX_INDEX);
+ msi_free_msi_descs_range(dev, 0, MSI_MAX_INDEX);
}
-void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
-void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
-
-void pci_msi_mask_irq(struct irq_data *data);
-void pci_msi_unmask_irq(struct irq_data *data);
-
/*
* The arch hooks to setup up msi irqs. Default functions are implemented
* as weak symbols so that they /can/ be overriden by architecture specific
@@ -293,7 +394,7 @@ static inline void msi_device_destroy_sysfs(struct device *dev) { }
*/
bool arch_restore_msi_irqs(struct pci_dev *dev);
-#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
+#ifdef CONFIG_GENERIC_MSI_IRQ
#include <linux/irqhandler.h>
@@ -309,19 +410,22 @@ struct msi_domain_info;
* @get_hwirq: Retrieve the resulting hw irq number
* @msi_init: Domain specific init function for MSI interrupts
* @msi_free: Domain specific function to free a MSI interrupts
- * @msi_check: Callback for verification of the domain/info/dev data
* @msi_prepare: Prepare the allocation of the interrupts in the domain
+ * @prepare_desc: Optional function to prepare the allocated MSI descriptor
+ * in the domain
* @set_desc: Set the msi descriptor for an interrupt
* @domain_alloc_irqs: Optional function to override the default allocation
* function.
* @domain_free_irqs: Optional function to override the default free
* function.
+ * @msi_post_free: Optional function which is invoked after freeing
+ * all interrupts.
*
* @get_hwirq, @msi_init and @msi_free are callbacks used by the underlying
* irqdomain.
*
- * @msi_check, @msi_prepare and @set_desc are callbacks used by
- * msi_domain_alloc/free_irqs().
+ * @msi_check, @msi_prepare, @prepare_desc and @set_desc are callbacks used by the
+ * msi_domain_alloc/free_irqs*() variants.
*
* @domain_alloc_irqs, @domain_free_irqs can be used to override the
* default allocation/free functions (__msi_domain_alloc/free_irqs). This
@@ -329,15 +433,6 @@ struct msi_domain_info;
* be wrapped into the regular irq domains concepts by mere mortals. This
* allows to universally use msi_domain_alloc/free_irqs without having to
* special case XEN all over the place.
- *
- * Contrary to other operations @domain_alloc_irqs and @domain_free_irqs
- * are set to the default implementation if NULL and even when
- * MSI_FLAG_USE_DEF_DOM_OPS is not set to avoid breaking existing users and
- * because these callbacks are obviously mandatory.
- *
- * This is NOT meant to be abused, but it can be useful to build wrappers
- * for specialized MSI irq domains which need extra work before and after
- * calling __msi_domain_alloc_irqs()/__msi_domain_free_irqs().
*/
struct msi_domain_ops {
irq_hw_number_t (*get_hwirq)(struct msi_domain_info *info,
@@ -349,23 +444,29 @@ struct msi_domain_ops {
void (*msi_free)(struct irq_domain *domain,
struct msi_domain_info *info,
unsigned int virq);
- int (*msi_check)(struct irq_domain *domain,
- struct msi_domain_info *info,
- struct device *dev);
int (*msi_prepare)(struct irq_domain *domain,
struct device *dev, int nvec,
msi_alloc_info_t *arg);
+ void (*prepare_desc)(struct irq_domain *domain, msi_alloc_info_t *arg,
+ struct msi_desc *desc);
void (*set_desc)(msi_alloc_info_t *arg,
struct msi_desc *desc);
int (*domain_alloc_irqs)(struct irq_domain *domain,
struct device *dev, int nvec);
void (*domain_free_irqs)(struct irq_domain *domain,
struct device *dev);
+ void (*msi_post_free)(struct irq_domain *domain,
+ struct device *dev);
};
/**
* struct msi_domain_info - MSI interrupt domain data
* @flags: Flags to decribe features and capabilities
+ * @bus_token: The domain bus token
+ * @hwsize: The hardware table size or the software index limit.
+ * If 0 then the size is considered unlimited and
+ * gets initialized to the maximum software index limit
+ * by the domain creation code.
* @ops: The callback data structure
* @chip: Optional: associated interrupt chip
* @chip_data: Optional: associated interrupt chip data
@@ -375,17 +476,42 @@ struct msi_domain_ops {
* @data: Optional: domain specific data
*/
struct msi_domain_info {
- u32 flags;
- struct msi_domain_ops *ops;
- struct irq_chip *chip;
- void *chip_data;
- irq_flow_handler_t handler;
- void *handler_data;
- const char *handler_name;
- void *data;
+ u32 flags;
+ enum irq_domain_bus_token bus_token;
+ unsigned int hwsize;
+ struct msi_domain_ops *ops;
+ struct irq_chip *chip;
+ void *chip_data;
+ irq_flow_handler_t handler;
+ void *handler_data;
+ const char *handler_name;
+ void *data;
};
-/* Flags for msi_domain_info */
+/**
+ * struct msi_domain_template - Template for MSI device domains
+ * @name: Storage for the resulting name. Filled in by the core.
+ * @chip: Interrupt chip for this domain
+ * @ops: MSI domain ops
+ * @info: MSI domain info data
+ */
+struct msi_domain_template {
+ char name[48];
+ struct irq_chip chip;
+ struct msi_domain_ops ops;
+ struct msi_domain_info info;
+};
+
+/*
+ * Flags for msi_domain_info
+ *
+ * Bit 0-15: Generic MSI functionality which is not subject to restriction
+ * by parent domains
+ *
+ * Bit 16-31: Functionality which depends on the underlying parent domain and
+ * can be masked out by msi_parent_ops::init_dev_msi_info() when
+ * a device MSI domain is initialized.
+ */
enum {
/*
* Init non implemented ops callbacks with default MSI domain
@@ -397,44 +523,100 @@ enum {
* callbacks.
*/
MSI_FLAG_USE_DEF_CHIP_OPS = (1 << 1),
- /* Support multiple PCI MSI interrupts */
- MSI_FLAG_MULTI_PCI_MSI = (1 << 2),
- /* Support PCI MSIX interrupts */
- MSI_FLAG_PCI_MSIX = (1 << 3),
/* Needs early activate, required for PCI */
- MSI_FLAG_ACTIVATE_EARLY = (1 << 4),
+ MSI_FLAG_ACTIVATE_EARLY = (1 << 2),
/*
* Must reactivate when irq is started even when
* MSI_FLAG_ACTIVATE_EARLY has been set.
*/
- MSI_FLAG_MUST_REACTIVATE = (1 << 5),
- /* Is level-triggered capable, using two messages */
- MSI_FLAG_LEVEL_CAPABLE = (1 << 6),
+ MSI_FLAG_MUST_REACTIVATE = (1 << 3),
/* Populate sysfs on alloc() and destroy it on free() */
- MSI_FLAG_DEV_SYSFS = (1 << 7),
- /* MSI-X entries must be contiguous */
- MSI_FLAG_MSIX_CONTIGUOUS = (1 << 8),
+ MSI_FLAG_DEV_SYSFS = (1 << 4),
/* Allocate simple MSI descriptors */
- MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS = (1 << 9),
+ MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS = (1 << 5),
/* Free MSI descriptors */
- MSI_FLAG_FREE_MSI_DESCS = (1 << 10),
+ MSI_FLAG_FREE_MSI_DESCS = (1 << 6),
+ /*
+ * Quirk to handle MSI implementations which do not provide
+ * masking. Currently known to affect x86, but has to be partially
+ * handled in the core MSI code.
+ */
+ MSI_FLAG_NOMASK_QUIRK = (1 << 7),
+
+ /* Mask for the generic functionality */
+ MSI_GENERIC_FLAGS_MASK = GENMASK(15, 0),
+
+ /* Mask for the domain specific functionality */
+ MSI_DOMAIN_FLAGS_MASK = GENMASK(31, 16),
+
+ /* Support multiple PCI MSI interrupts */
+ MSI_FLAG_MULTI_PCI_MSI = (1 << 16),
+ /* Support PCI MSIX interrupts */
+ MSI_FLAG_PCI_MSIX = (1 << 17),
+ /* Is level-triggered capable, using two messages */
+ MSI_FLAG_LEVEL_CAPABLE = (1 << 18),
+ /* MSI-X entries must be contiguous */
+ MSI_FLAG_MSIX_CONTIGUOUS = (1 << 19),
+ /* PCI/MSI-X vectors can be dynamically allocated/freed post MSI-X enable */
+ MSI_FLAG_PCI_MSIX_ALLOC_DYN = (1 << 20),
+ /* Support for PCI/IMS */
+ MSI_FLAG_PCI_IMS = (1 << 21),
};
+/**
+ * struct msi_parent_ops - MSI parent domain callbacks and configuration info
+ *
+ * @supported_flags: Required: The supported MSI flags of the parent domain
+ * @prefix: Optional: Prefix for the domain and chip name
+ * @init_dev_msi_info: Required: Callback for MSI parent domains to setup parent
+ * domain specific domain flags, domain ops and interrupt chip
+ * callbacks when a per device domain is created.
+ */
+struct msi_parent_ops {
+ u32 supported_flags;
+ const char *prefix;
+ bool (*init_dev_msi_info)(struct device *dev, struct irq_domain *domain,
+ struct irq_domain *msi_parent_domain,
+ struct msi_domain_info *msi_child_info);
+};
+
+bool msi_parent_init_dev_msi_info(struct device *dev, struct irq_domain *domain,
+ struct irq_domain *msi_parent_domain,
+ struct msi_domain_info *msi_child_info);
+
int msi_domain_set_affinity(struct irq_data *data, const struct cpumask *mask,
bool force);
struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode,
struct msi_domain_info *info,
struct irq_domain *parent);
-int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
- int nvec);
-int msi_domain_alloc_irqs_descs_locked(struct irq_domain *domain, struct device *dev,
- int nvec);
-int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
- int nvec);
-void __msi_domain_free_irqs(struct irq_domain *domain, struct device *dev);
-void msi_domain_free_irqs_descs_locked(struct irq_domain *domain, struct device *dev);
-void msi_domain_free_irqs(struct irq_domain *domain, struct device *dev);
+
+bool msi_create_device_irq_domain(struct device *dev, unsigned int domid,
+ const struct msi_domain_template *template,
+ unsigned int hwsize, void *domain_data,
+ void *chip_data);
+void msi_remove_device_irq_domain(struct device *dev, unsigned int domid);
+
+bool msi_match_device_irq_domain(struct device *dev, unsigned int domid,
+ enum irq_domain_bus_token bus_token);
+
+int msi_domain_alloc_irqs_range_locked(struct device *dev, unsigned int domid,
+ unsigned int first, unsigned int last);
+int msi_domain_alloc_irqs_range(struct device *dev, unsigned int domid,
+ unsigned int first, unsigned int last);
+int msi_domain_alloc_irqs_all_locked(struct device *dev, unsigned int domid, int nirqs);
+
+struct msi_map msi_domain_alloc_irq_at(struct device *dev, unsigned int domid, unsigned int index,
+ const struct irq_affinity_desc *affdesc,
+ union msi_instance_cookie *cookie);
+
+void msi_domain_free_irqs_range_locked(struct device *dev, unsigned int domid,
+ unsigned int first, unsigned int last);
+void msi_domain_free_irqs_range(struct device *dev, unsigned int domid,
+ unsigned int first, unsigned int last);
+void msi_domain_free_irqs_all_locked(struct device *dev, unsigned int domid);
+void msi_domain_free_irqs_all(struct device *dev, unsigned int domid);
+
struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain);
struct irq_domain *platform_msi_create_irq_domain(struct fwnode_handle *fwnode,
@@ -467,20 +649,27 @@ int platform_msi_device_domain_alloc(struct irq_domain *domain, unsigned int vir
void platform_msi_device_domain_free(struct irq_domain *domain, unsigned int virq,
unsigned int nvec);
void *platform_msi_get_host_data(struct irq_domain *domain);
-#endif /* CONFIG_GENERIC_MSI_IRQ_DOMAIN */
+#endif /* CONFIG_GENERIC_MSI_IRQ */
-#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN
+/* PCI specific interfaces */
+#ifdef CONFIG_PCI_MSI
+struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc);
+void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg);
+void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
+void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
+void pci_msi_mask_irq(struct irq_data *data);
+void pci_msi_unmask_irq(struct irq_data *data);
struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode,
struct msi_domain_info *info,
struct irq_domain *parent);
u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev *pdev);
struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev);
-bool pci_dev_has_special_msi_domain(struct pci_dev *pdev);
-#else
+#else /* CONFIG_PCI_MSI */
static inline struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev)
{
return NULL;
}
-#endif /* CONFIG_PCI_MSI_IRQ_DOMAIN */
+static inline void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg) { }
+#endif /* !CONFIG_PCI_MSI */
#endif /* LINUX_MSI_H */
diff --git a/include/linux/msi_api.h b/include/linux/msi_api.h
new file mode 100644
index 000000000000..391087ad99b1
--- /dev/null
+++ b/include/linux/msi_api.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef LINUX_MSI_API_H
+#define LINUX_MSI_API_H
+
+/*
+ * APIs which are relevant for device driver code for allocating and
+ * freeing MSI interrupts and querying the associations between
+ * hardware/software MSI indices and the Linux interrupt number.
+ */
+
+struct device;
+
+/*
+ * Per device interrupt domain related constants.
+ */
+enum msi_domain_ids {
+ MSI_DEFAULT_DOMAIN,
+ MSI_SECONDARY_DOMAIN,
+ MSI_MAX_DEVICE_IRQDOMAINS,
+};
+
+/**
+ * union msi_instance_cookie - MSI instance cookie
+ * @value: u64 value store
+ * @ptr: Pointer to usage site specific data
+ *
+ * This cookie is handed to the IMS allocation function and stored in the
+ * MSI descriptor for the interrupt chip callbacks.
+ *
+ * The content of this cookie is MSI domain implementation defined. For
+ * PCI/IMS implementations this could be a PASID or a pointer to queue
+ * memory.
+ */
+union msi_instance_cookie {
+ u64 value;
+ void *ptr;
+};
+
+/**
+ * msi_map - Mapping between MSI index and Linux interrupt number
+ * @index: The MSI index, e.g. slot in the MSI-X table or
+ * a software managed index if >= 0. If negative
+ * the allocation function failed and it contains
+ * the error code.
+ * @virq: The associated Linux interrupt number
+ */
+struct msi_map {
+ int index;
+ int virq;
+};
+
+/*
+ * Constant to be used for dynamic allocations when the allocation is any
+ * free MSI index, which is either an entry in a hardware table or a
+ * software managed index.
+ */
+#define MSI_ANY_INDEX UINT_MAX
+
+unsigned int msi_domain_get_virq(struct device *dev, unsigned int domid, unsigned int index);
+
+/**
+ * msi_get_virq - Lookup the Linux interrupt number for a MSI index on the default interrupt domain
+ * @dev: Device for which the lookup happens
+ * @index: The MSI index to lookup
+ *
+ * Return: The Linux interrupt number on success (> 0), 0 if not found
+ */
+static inline unsigned int msi_get_virq(struct device *dev, unsigned int index)
+{
+ return msi_domain_get_virq(dev, MSI_DEFAULT_DOMAIN, index);
+}
+
+#endif
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 955aee14b0f7..7c58c44662b8 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -40,6 +40,12 @@ struct mtd_erase_region_info {
unsigned long *lockmap; /* If keeping bitmap of locks */
};
+struct mtd_req_stats {
+ unsigned int uncorrectable_errors;
+ unsigned int corrected_bitflips;
+ unsigned int max_bitflips;
+};
+
/**
* struct mtd_oob_ops - oob operation operands
* @mode: operation mode
@@ -70,6 +76,7 @@ struct mtd_oob_ops {
uint32_t ooboffs;
uint8_t *datbuf;
uint8_t *oobbuf;
+ struct mtd_req_stats *stats;
};
/**
@@ -677,6 +684,7 @@ extern int mtd_device_unregister(struct mtd_info *master);
extern struct mtd_info *get_mtd_device(struct mtd_info *mtd, int num);
extern int __get_mtd_device(struct mtd_info *mtd);
extern void __put_mtd_device(struct mtd_info *mtd);
+extern struct mtd_info *of_get_mtd_device_by_node(struct device_node *np);
extern struct mtd_info *get_mtd_device_nm(const char *name);
extern void put_mtd_device(struct mtd_info *mtd);
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index c3693bb87b4c..b2996dc987ff 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -999,7 +999,6 @@ static inline bool nanddev_io_iter_end(struct nand_device *nand,
bool nanddev_isbad(struct nand_device *nand, const struct nand_pos *pos);
bool nanddev_isreserved(struct nand_device *nand, const struct nand_pos *pos);
-int nanddev_erase(struct nand_device *nand, const struct nand_pos *pos);
int nanddev_markbad(struct nand_device *nand, const struct nand_pos *pos);
/* ECC related functions */
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 42218a1164f6..25765556223a 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -349,6 +349,8 @@ struct spi_nor_flash_parameter;
* @bouncebuf: bounce buffer used when the buffer passed by the MTD
* layer is not DMA-able
* @bouncebuf_size: size of the bounce buffer
+ * @id: The flash's ID bytes. Always contains
+ * SPI_NOR_MAX_ID_LEN bytes.
* @info: SPI NOR part JEDEC MFR ID and other info
* @manufacturer: SPI NOR manufacturer
* @addr_nbytes: number of address bytes
@@ -379,6 +381,7 @@ struct spi_nor {
struct spi_mem *spimem;
u8 *bouncebuf;
size_t bouncebuf_size;
+ u8 *id;
const struct flash_info *info;
const struct spi_nor_manufacturer *manufacturer;
u8 addr_nbytes;
diff --git a/include/linux/mv643xx_eth.h b/include/linux/mv643xx_eth.h
index 3682ae75c7aa..145169be2ed8 100644
--- a/include/linux/mv643xx_eth.h
+++ b/include/linux/mv643xx_eth.h
@@ -8,6 +8,7 @@
#include <linux/mbus.h>
#include <linux/if_ether.h>
+#include <linux/phy.h>
#define MV643XX_ETH_SHARED_NAME "mv643xx_eth"
#define MV643XX_ETH_NAME "mv643xx_eth_port"
@@ -59,6 +60,7 @@ struct mv643xx_eth_platform_data {
*/
int speed;
int duplex;
+ phy_interface_t interface;
/*
* How many RX/TX queues to use.
diff --git a/include/linux/namei.h b/include/linux/namei.h
index caeb08a98536..00fee52df842 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -83,7 +83,7 @@ extern int follow_up(struct path *);
extern struct dentry *lock_rename(struct dentry *, struct dentry *);
extern void unlock_rename(struct dentry *, struct dentry *);
-extern int __must_check nd_jump_link(struct path *path);
+extern int __must_check nd_jump_link(const struct path *path);
static inline void nd_terminate_link(void *name, size_t len, size_t maxlen)
{
diff --git a/include/linux/net.h b/include/linux/net.h
index 711c3593c3b8..b73ad8e3c212 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -41,6 +41,8 @@ struct net;
#define SOCK_NOSPACE 2
#define SOCK_PASSCRED 3
#define SOCK_PASSSEC 4
+#define SOCK_SUPPORT_ZC 5
+#define SOCK_CUSTOM_SOCKOPT 6
#ifndef ARCH_HAS_SOCKET_TYPES
/**
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1a3cb93c3dcc..aad12a179e54 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -78,6 +78,7 @@ struct xdp_buff;
void synchronize_net(void);
void netdev_set_default_ethtool_ops(struct net_device *dev,
const struct ethtool_ops *ops);
+void netdev_sw_irq_coalesce_default_on(struct net_device *dev);
/* Backlog congestion levels */
#define NET_RX_SUCCESS 0 /* keep 'em coming, baby */
@@ -171,31 +172,38 @@ static inline bool dev_xmit_complete(int rc)
* (unsigned long) so they can be read and written atomically.
*/
+#define NET_DEV_STAT(FIELD) \
+ union { \
+ unsigned long FIELD; \
+ atomic_long_t __##FIELD; \
+ }
+
struct net_device_stats {
- unsigned long rx_packets;
- unsigned long tx_packets;
- unsigned long rx_bytes;
- unsigned long tx_bytes;
- unsigned long rx_errors;
- unsigned long tx_errors;
- unsigned long rx_dropped;
- unsigned long tx_dropped;
- unsigned long multicast;
- unsigned long collisions;
- unsigned long rx_length_errors;
- unsigned long rx_over_errors;
- unsigned long rx_crc_errors;
- unsigned long rx_frame_errors;
- unsigned long rx_fifo_errors;
- unsigned long rx_missed_errors;
- unsigned long tx_aborted_errors;
- unsigned long tx_carrier_errors;
- unsigned long tx_fifo_errors;
- unsigned long tx_heartbeat_errors;
- unsigned long tx_window_errors;
- unsigned long rx_compressed;
- unsigned long tx_compressed;
+ NET_DEV_STAT(rx_packets);
+ NET_DEV_STAT(tx_packets);
+ NET_DEV_STAT(rx_bytes);
+ NET_DEV_STAT(tx_bytes);
+ NET_DEV_STAT(rx_errors);
+ NET_DEV_STAT(tx_errors);
+ NET_DEV_STAT(rx_dropped);
+ NET_DEV_STAT(tx_dropped);
+ NET_DEV_STAT(multicast);
+ NET_DEV_STAT(collisions);
+ NET_DEV_STAT(rx_length_errors);
+ NET_DEV_STAT(rx_over_errors);
+ NET_DEV_STAT(rx_crc_errors);
+ NET_DEV_STAT(rx_frame_errors);
+ NET_DEV_STAT(rx_fifo_errors);
+ NET_DEV_STAT(rx_missed_errors);
+ NET_DEV_STAT(tx_aborted_errors);
+ NET_DEV_STAT(tx_carrier_errors);
+ NET_DEV_STAT(tx_fifo_errors);
+ NET_DEV_STAT(tx_heartbeat_errors);
+ NET_DEV_STAT(tx_window_errors);
+ NET_DEV_STAT(rx_compressed);
+ NET_DEV_STAT(tx_compressed);
};
+#undef NET_DEV_STAT
/* per-cpu stats, allocated on demand.
* Try to fit them in a single cache line, for dev_get_stats() sake.
@@ -253,11 +261,17 @@ struct netdev_hw_addr_list {
#define netdev_uc_empty(dev) netdev_hw_addr_list_empty(&(dev)->uc)
#define netdev_for_each_uc_addr(ha, dev) \
netdev_hw_addr_list_for_each(ha, &(dev)->uc)
+#define netdev_for_each_synced_uc_addr(_ha, _dev) \
+ netdev_for_each_uc_addr((_ha), (_dev)) \
+ if ((_ha)->sync_cnt)
#define netdev_mc_count(dev) netdev_hw_addr_list_count(&(dev)->mc)
#define netdev_mc_empty(dev) netdev_hw_addr_list_empty(&(dev)->mc)
#define netdev_for_each_mc_addr(ha, dev) \
netdev_hw_addr_list_for_each(ha, &(dev)->mc)
+#define netdev_for_each_synced_mc_addr(_ha, _dev) \
+ netdev_for_each_mc_addr((_ha), (_dev)) \
+ if ((_ha)->sync_cnt)
struct hh_cache {
unsigned int hh_len;
@@ -546,8 +560,8 @@ static inline bool napi_if_scheduled_mark_missed(struct napi_struct *n)
{
unsigned long val, new;
+ val = READ_ONCE(n->state);
do {
- val = READ_ONCE(n->state);
if (val & NAPIF_STATE_DISABLE)
return true;
@@ -555,7 +569,7 @@ static inline bool napi_if_scheduled_mark_missed(struct napi_struct *n)
return false;
new = val | NAPIF_STATE_MISSED;
- } while (cmpxchg(&n->state, val, new) != val);
+ } while (!try_cmpxchg(&n->state, &val, new));
return true;
}
@@ -640,9 +654,23 @@ extern int sysctl_devconf_inherit_init_net;
*/
static inline bool net_has_fallback_tunnels(const struct net *net)
{
- return !IS_ENABLED(CONFIG_SYSCTL) ||
- !sysctl_fb_tunnels_only_for_init_net ||
- (net == &init_net && sysctl_fb_tunnels_only_for_init_net == 1);
+#if IS_ENABLED(CONFIG_SYSCTL)
+ int fb_tunnels_only_for_init_net = READ_ONCE(sysctl_fb_tunnels_only_for_init_net);
+
+ return !fb_tunnels_only_for_init_net ||
+ (net_eq(net, &init_net) && fb_tunnels_only_for_init_net == 1);
+#else
+ return true;
+#endif
+}
+
+static inline int net_inherit_devconf(void)
+{
+#if IS_ENABLED(CONFIG_SYSCTL)
+ return READ_ONCE(sysctl_devconf_inherit_init_net);
+#else
+ return 0;
+#endif
}
static inline int netdev_queue_numa_node_read(const struct netdev_queue *q)
@@ -920,6 +948,7 @@ struct net_device_path_ctx {
};
enum tc_setup_type {
+ TC_QUERY_CAPS,
TC_SETUP_QDISC_MQPRIO,
TC_SETUP_CLSU32,
TC_SETUP_CLSFLOWER,
@@ -1012,6 +1041,10 @@ struct xfrmdev_ops {
bool (*xdo_dev_offload_ok) (struct sk_buff *skb,
struct xfrm_state *x);
void (*xdo_dev_state_advance_esn) (struct xfrm_state *x);
+ void (*xdo_dev_state_update_curlft) (struct xfrm_state *x);
+ int (*xdo_dev_policy_add) (struct xfrm_policy *x);
+ void (*xdo_dev_policy_delete) (struct xfrm_policy *x);
+ void (*xdo_dev_policy_free) (struct xfrm_policy *x);
};
#endif
@@ -1345,10 +1378,6 @@ struct netdev_net_notifier {
* queue id bound to an AF_XDP socket. The flags field specifies if
* only RX, only Tx, or both should be woken up using the flags
* XDP_WAKEUP_RX and XDP_WAKEUP_TX.
- * struct devlink_port *(*ndo_get_devlink_port)(struct net_device *dev);
- * Get devlink port instance associated with a given netdev.
- * Called with a reference on the netdevice and devlink locks only,
- * rtnl_lock is not held.
* int (*ndo_tunnel_ctl)(struct net_device *dev, struct ip_tunnel_parm *p,
* int cmd);
* Add, change, delete or get information on an IPv4 tunnel.
@@ -1579,7 +1608,6 @@ struct net_device_ops {
struct xdp_buff *xdp);
int (*ndo_xsk_wakeup)(struct net_device *dev,
u32 queue_id, u32 flags);
- struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev);
int (*ndo_tunnel_ctl)(struct net_device *dev,
struct ip_tunnel_parm *p, int cmd);
struct net_device * (*ndo_get_peer_dev)(struct net_device *dev);
@@ -1634,7 +1662,7 @@ struct net_device_ops {
* @IFF_FAILOVER: device is a failover master device
* @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device
* @IFF_L3MDEV_RX_HANDLER: only invoke the rx handler of L3 master device
- * @IFF_LIVE_RENAME_OK: rename is allowed while device is up and running
+ * @IFF_NO_ADDRCONF: prevent ipv6 addrconf
* @IFF_TX_SKB_NO_LINEAR: device/driver is capable of xmitting frames with
* skb_headlen(skb) == 0 (data starts from frag0)
* @IFF_CHANGE_PROTO_DOWN: device supports setting carrier via IFLA_PROTO_DOWN
@@ -1670,7 +1698,7 @@ enum netdev_priv_flags {
IFF_FAILOVER = 1<<27,
IFF_FAILOVER_SLAVE = 1<<28,
IFF_L3MDEV_RX_HANDLER = 1<<29,
- IFF_LIVE_RENAME_OK = 1<<30,
+ IFF_NO_ADDRCONF = BIT_ULL(30),
IFF_TX_SKB_NO_LINEAR = BIT_ULL(31),
IFF_CHANGE_PROTO_DOWN = BIT_ULL(32),
};
@@ -1705,7 +1733,6 @@ enum netdev_priv_flags {
#define IFF_FAILOVER IFF_FAILOVER
#define IFF_FAILOVER_SLAVE IFF_FAILOVER_SLAVE
#define IFF_L3MDEV_RX_HANDLER IFF_L3MDEV_RX_HANDLER
-#define IFF_LIVE_RENAME_OK IFF_LIVE_RENAME_OK
#define IFF_TX_SKB_NO_LINEAR IFF_TX_SKB_NO_LINEAR
/* Specifies the type of the struct net_device::ml_priv pointer */
@@ -1837,7 +1864,6 @@ enum netdev_ml_priv_type {
* @tipc_ptr: TIPC specific data
* @atalk_ptr: AppleTalk link
* @ip_ptr: IPv4 specific data
- * @dn_ptr: DECnet specific data
* @ip6_ptr: IPv6 specific data
* @ax25_ptr: AX.25 specific data
* @ieee80211_ptr: IEEE 802.11 specific data, assign before registering
@@ -1979,6 +2005,11 @@ enum netdev_ml_priv_type {
* registered
* @offload_xstats_l3: L3 HW stats for this netdevice.
*
+ * @devlink_port: Pointer to related devlink port structure.
+ * Assigned by a driver before netdev registration using
+ * SET_NETDEV_DEVLINK_PORT macro. This pointer is static
+ * during the time netdevice is registered.
+ *
* FIXME: cleanup struct net_device such that network protocol info
* moves out.
*/
@@ -2133,9 +2164,6 @@ struct net_device {
#if IS_ENABLED(CONFIG_ATALK)
void *atalk_ptr;
#endif
-#if IS_ENABLED(CONFIG_DECNET)
- struct dn_dev __rcu *dn_ptr;
-#endif
#if IS_ENABLED(CONFIG_AX25)
void *ax25_ptr;
#endif
@@ -2332,9 +2360,22 @@ struct net_device {
netdevice_tracker watchdog_dev_tracker;
netdevice_tracker dev_registered_tracker;
struct rtnl_hw_stats64 *offload_xstats_l3;
+
+ struct devlink_port *devlink_port;
};
#define to_net_dev(d) container_of(d, struct net_device, dev)
+/*
+ * Driver should use this to assign devlink port instance to a netdevice
+ * before it registers the netdevice. Therefore devlink_port is static
+ * during the netdev lifetime after it is registered.
+ */
+#define SET_NETDEV_DEVLINK_PORT(dev, port) \
+({ \
+ WARN_ON((dev)->reg_state != NETREG_UNINITIALIZED); \
+ ((dev)->devlink_port = (port)); \
+})
+
static inline bool netif_elide_gro(const struct net_device *dev)
{
if (!(dev->features & NETIF_F_GRO) || dev->xdp_prog)
@@ -2537,16 +2578,15 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
* @dev: network device
* @napi: NAPI context
* @poll: polling function
- * @weight: default weight
*
* netif_napi_add() must be used to initialize a NAPI context prior to calling
* *any* of the other NAPI-related functions.
*/
static inline void
netif_napi_add(struct net_device *dev, struct napi_struct *napi,
- int (*poll)(struct napi_struct *, int), int weight)
+ int (*poll)(struct napi_struct *, int))
{
- netif_napi_add_weight(dev, napi, poll, weight);
+ netif_napi_add_weight(dev, napi, poll, NAPI_POLL_WEIGHT);
}
static inline void
@@ -2559,8 +2599,6 @@ netif_napi_add_tx_weight(struct net_device *dev,
netif_napi_add_weight(dev, napi, poll, weight);
}
-#define netif_tx_napi_add netif_napi_add_tx_weight
-
/**
* netif_napi_add_tx() - initialize a NAPI context to be used for Tx only
* @dev: network device
@@ -2771,6 +2809,7 @@ enum netdev_cmd {
NETDEV_PRE_TYPE_CHANGE,
NETDEV_POST_TYPE_CHANGE,
NETDEV_POST_INIT,
+ NETDEV_PRE_UNINIT,
NETDEV_RELEASE,
NETDEV_NOTIFY_PEERS,
NETDEV_JOIN,
@@ -2800,6 +2839,8 @@ int unregister_netdevice_notifier(struct notifier_block *nb);
int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb);
int unregister_netdevice_notifier_net(struct net *net,
struct notifier_block *nb);
+void move_netdevice_notifier_net(struct net *src_net, struct net *dst_net,
+ struct notifier_block *nb);
int register_netdevice_notifier_dev_net(struct net_device *dev,
struct notifier_block *nb,
struct netdev_net_notifier *nn);
@@ -3100,7 +3141,6 @@ struct softnet_data {
/* stats */
unsigned int processed;
unsigned int time_squeeze;
- unsigned int received_rps;
#ifdef CONFIG_RPS
struct softnet_data *rps_ipi_list;
#endif
@@ -3133,6 +3173,7 @@ struct softnet_data {
unsigned int cpu;
unsigned int input_queue_tail;
#endif
+ unsigned int received_rps;
unsigned int dropped;
struct sk_buff_head input_pkt_queue;
struct napi_struct backlog;
@@ -3343,6 +3384,16 @@ static inline void netdev_txq_bql_complete_prefetchw(struct netdev_queue *dev_qu
#endif
}
+/**
+ * netdev_tx_sent_queue - report the number of bytes queued to a given tx queue
+ * @dev_queue: network device queue
+ * @bytes: number of bytes queued to the device queue
+ *
+ * Report the number of bytes queued for sending/completion to the network
+ * device hardware queue. @bytes should be a good approximation and should
+ * exactly match netdev_completed_queue() @bytes.
+ * This is typically called once per packet, from ndo_start_xmit().
+ */
static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue,
unsigned int bytes)
{
@@ -3388,13 +3439,14 @@ static inline bool __netdev_tx_sent_queue(struct netdev_queue *dev_queue,
}
/**
- * netdev_sent_queue - report the number of bytes queued to hardware
- * @dev: network device
- * @bytes: number of bytes queued to the hardware device queue
+ * netdev_sent_queue - report the number of bytes queued to hardware
+ * @dev: network device
+ * @bytes: number of bytes queued to the hardware device queue
*
- * Report the number of bytes queued for sending/completion to the network
- * device hardware queue. @bytes should be a good approximation and should
- * exactly match netdev_completed_queue() @bytes
+ * Report the number of bytes queued for sending/completion to the network
+ * device hardware queue#0. @bytes should be a good approximation and should
+ * exactly match netdev_completed_queue() @bytes.
+ * This is typically called once per packet, from ndo_start_xmit().
*/
static inline void netdev_sent_queue(struct net_device *dev, unsigned int bytes)
{
@@ -3409,6 +3461,15 @@ static inline bool __netdev_sent_queue(struct net_device *dev,
xmit_more);
}
+/**
+ * netdev_tx_completed_queue - report number of packets/bytes at TX completion.
+ * @dev_queue: network device queue
+ * @pkts: number of packets (currently ignored)
+ * @bytes: number of bytes dequeued from the device queue
+ *
+ * Must be called at most once per TX completion round (and not per
+ * individual packet), so that BQL can adjust its limits appropriately.
+ */
static inline void netdev_tx_completed_queue(struct netdev_queue *dev_queue,
unsigned int pkts, unsigned int bytes)
{
@@ -3788,6 +3849,7 @@ void netif_receive_skb_list(struct list_head *head);
gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb);
void napi_gro_flush(struct napi_struct *napi, bool flush_old);
struct sk_buff *napi_get_frags(struct napi_struct *napi);
+void napi_get_frags_check(struct napi_struct *napi);
gro_result_t napi_gro_frags(struct napi_struct *napi);
struct packet_offload *gro_find_receive_by_type(__be16 type);
struct packet_offload *gro_find_complete_by_type(__be16 type);
@@ -3820,8 +3882,6 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags,
struct netlink_ext_ack *extack);
int dev_change_flags(struct net_device *dev, unsigned int flags,
struct netlink_ext_ack *extack);
-void __dev_notify_flags(struct net_device *, unsigned int old_flags,
- unsigned int gchanges);
int dev_set_alias(struct net_device *, const char *, size_t);
int dev_get_alias(const struct net_device *, char *, size_t);
int __dev_change_net_namespace(struct net_device *dev, struct net *net,
@@ -5066,11 +5126,6 @@ static inline const char *netdev_name(const struct net_device *dev)
return dev->name;
}
-static inline bool netdev_unregistering(const struct net_device *dev)
-{
- return dev->reg_state == NETREG_UNREGISTERING;
-}
-
static inline const char *netdev_reg_state(const struct net_device *dev)
{
switch (dev->reg_state) {
@@ -5129,4 +5184,9 @@ extern struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
extern struct net_device *blackhole_netdev;
+/* Note: Avoid these macros in fast path, prefer per-cpu or per-queue counters. */
+#define DEV_STATS_INC(DEV, FIELD) atomic_long_inc(&(DEV)->stats.__##FIELD)
+#define DEV_STATS_ADD(DEV, FIELD, VAL) \
+ atomic_long_add((VAL), &(DEV)->stats.__##FIELD)
+
#endif /* _LINUX_NETDEVICE_H */
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index c2c6f332fb90..d8817d381c14 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -243,11 +243,6 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
hook_head = rcu_dereference(net->nf.hooks_bridge[hook]);
#endif
break;
-#if IS_ENABLED(CONFIG_DECNET)
- case NFPROTO_DECNET:
- hook_head = rcu_dereference(net->nf.hooks_decnet[hook]);
- break;
-#endif
default:
WARN_ON_ONCE(1);
break;
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index ada1296c87d5..ab934ad951a8 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -515,6 +515,16 @@ ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo,
*skbinfo = ext->skbinfo;
}
+static inline void
+nf_inet_addr_mask_inplace(union nf_inet_addr *a1,
+ const union nf_inet_addr *mask)
+{
+ a1->all[0] &= mask->all[0];
+ a1->all[1] &= mask->all[1];
+ a1->all[2] &= mask->all[2];
+ a1->all[3] &= mask->all[3];
+}
+
#define IP_SET_INIT_KEXT(skb, opt, set) \
{ .bytes = (skb)->len, .packets = 1, .target = true,\
.timeout = ip_set_adt_opt_timeout(opt, set) }
diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index a13296d6c7ce..fd533552a062 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -94,10 +94,6 @@ struct ebt_table {
struct ebt_replace_kernel *table;
unsigned int valid_hooks;
rwlock_t lock;
- /* e.g. could be the table explicitly only allows certain
- * matches, targets, ... 0 == let it in */
- int (*check)(const struct ebt_table_info *info,
- unsigned int valid_hooks);
/* the data used by the kernel */
struct ebt_table_info *private;
struct nf_hook_ops *ops;
diff --git a/include/linux/netfilter_defs.h b/include/linux/netfilter_defs.h
index 8dddfb151f00..a5f7bef1b3a4 100644
--- a/include/linux/netfilter_defs.h
+++ b/include/linux/netfilter_defs.h
@@ -7,14 +7,6 @@
/* in/out/forward only */
#define NF_ARP_NUMHOOKS 3
-/* max hook is NF_DN_ROUTE (6), also see uapi/linux/netfilter_decnet.h */
-#define NF_DN_NUMHOOKS 7
-
-#if IS_ENABLED(CONFIG_DECNET)
-/* Largest hook number + 1, see uapi/linux/netfilter_decnet.h */
-#define NF_MAX_HOOKS NF_DN_NUMHOOKS
-#else
#define NF_MAX_HOOKS NF_INET_NUMHOOKS
-#endif
#endif
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index f2402ddeafbf..4c76ddfb6a67 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -267,6 +267,14 @@ struct netfs_cache_ops {
loff_t *_start, size_t *_len, loff_t i_size,
bool no_space_allocated_yet);
+ /* Prepare an on-demand read operation, shortening it to a cached/uncached
+ * boundary as appropriate.
+ */
+ enum netfs_io_source (*prepare_ondemand_read)(struct netfs_cache_resources *cres,
+ loff_t start, size_t *_len,
+ loff_t i_size,
+ unsigned long *_flags, ino_t ino);
+
/* Query the occupancy of the cache in a region, returning where the
* next chunk of data starts and how long it is.
*/
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index bda1c385cffb..d81bde5a5844 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -64,6 +64,7 @@ netlink_kernel_create(struct net *net, int unit, struct netlink_kernel_cfg *cfg)
/* this can be increased when necessary - don't expose to userland */
#define NETLINK_MAX_COOKIE_LEN 20
+#define NETLINK_MAX_FMTMSG_LEN 80
/**
* struct netlink_ext_ack - netlink extended ACK report struct
@@ -71,22 +72,28 @@ netlink_kernel_create(struct net *net, int unit, struct netlink_kernel_cfg *cfg)
* %NL_SET_ERR_MSG
* @bad_attr: attribute with error
* @policy: policy for a bad attribute
+ * @miss_type: attribute type which was missing
+ * @miss_nest: nest missing an attribute (%NULL if missing top level attr)
* @cookie: cookie data to return to userspace (for success)
* @cookie_len: actual cookie data length
+ * @_msg_buf: output buffer for formatted message strings - don't access
+ * directly, use %NL_SET_ERR_MSG_FMT
*/
struct netlink_ext_ack {
const char *_msg;
const struct nlattr *bad_attr;
const struct nla_policy *policy;
+ const struct nlattr *miss_nest;
+ u16 miss_type;
u8 cookie[NETLINK_MAX_COOKIE_LEN];
u8 cookie_len;
+ char _msg_buf[NETLINK_MAX_FMTMSG_LEN];
};
/* Always use this macro, this allows later putting the
* message into a separate section or such for things
* like translation or listing all possible messages.
- * Currently string formatting is not supported (due
- * to the lack of an output buffer.)
+ * If string formatting is needed use NL_SET_ERR_MSG_FMT.
*/
#define NL_SET_ERR_MSG(extack, msg) do { \
static const char __msg[] = msg; \
@@ -98,9 +105,31 @@ struct netlink_ext_ack {
__extack->_msg = __msg; \
} while (0)
+/* We splice fmt with %s at each end even in the snprintf so that both calls
+ * can use the same string constant, avoiding its duplication in .ro
+ */
+#define NL_SET_ERR_MSG_FMT(extack, fmt, args...) do { \
+ struct netlink_ext_ack *__extack = (extack); \
+ \
+ if (!__extack) \
+ break; \
+ if (snprintf(__extack->_msg_buf, NETLINK_MAX_FMTMSG_LEN, \
+ "%s" fmt "%s", "", ##args, "") >= \
+ NETLINK_MAX_FMTMSG_LEN) \
+ net_warn_ratelimited("%s" fmt "%s", "truncated extack: ", \
+ ##args, "\n"); \
+ \
+ do_trace_netlink_extack(__extack->_msg_buf); \
+ \
+ __extack->_msg = __extack->_msg_buf; \
+} while (0)
+
#define NL_SET_ERR_MSG_MOD(extack, msg) \
NL_SET_ERR_MSG((extack), KBUILD_MODNAME ": " msg)
+#define NL_SET_ERR_MSG_FMT_MOD(extack, fmt, args...) \
+ NL_SET_ERR_MSG_FMT((extack), KBUILD_MODNAME ": " fmt, ##args)
+
#define NL_SET_BAD_ATTR_POLICY(extack, attr, pol) do { \
if ((extack)) { \
(extack)->bad_attr = (attr); \
@@ -126,6 +155,26 @@ struct netlink_ext_ack {
#define NL_SET_ERR_MSG_ATTR(extack, attr, msg) \
NL_SET_ERR_MSG_ATTR_POL(extack, attr, NULL, msg)
+#define NL_SET_ERR_ATTR_MISS(extack, nest, type) do { \
+ struct netlink_ext_ack *__extack = (extack); \
+ \
+ if (__extack) { \
+ __extack->miss_nest = (nest); \
+ __extack->miss_type = (type); \
+ } \
+} while (0)
+
+#define NL_REQ_ATTR_CHECK(extack, nest, tb, type) ({ \
+ struct nlattr **__tb = (tb); \
+ u32 __attr = (type); \
+ int __retval; \
+ \
+ __retval = !__tb[__attr]; \
+ if (__retval) \
+ NL_SET_ERR_ATTR_MISS((extack), (nest), __attr); \
+ __retval; \
+})
+
static inline void nl_set_extack_cookie_u64(struct netlink_ext_ack *extack,
u64 cookie)
{
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 8d04b6a5964c..730003c4f4af 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -732,4 +732,17 @@ enum nfs4_setxattr_options {
SETXATTR4_CREATE = 1,
SETXATTR4_REPLACE = 2,
};
+
+enum {
+ RCA4_TYPE_MASK_RDATA_DLG = 0,
+ RCA4_TYPE_MASK_WDATA_DLG = 1,
+ RCA4_TYPE_MASK_DIR_DLG = 2,
+ RCA4_TYPE_MASK_FILE_LAYOUT = 3,
+ RCA4_TYPE_MASK_BLK_LAYOUT = 4,
+ RCA4_TYPE_MASK_OBJ_LAYOUT_MIN = 8,
+ RCA4_TYPE_MASK_OBJ_LAYOUT_MAX = 9,
+ RCA4_TYPE_MASK_OTHER_LAYOUT_MIN = 12,
+ RCA4_TYPE_MASK_OTHER_LAYOUT_MAX = 15,
+};
+
#endif
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index b32ed68e7dc4..d92fdfd2444c 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -59,6 +59,7 @@ struct nfs_access_entry {
kuid_t fsuid;
kgid_t fsgid;
struct group_info *group_info;
+ u64 timestamp;
__u32 mask;
struct rcu_head rcu_head;
};
@@ -83,7 +84,6 @@ struct nfs_open_context {
fmode_t mode;
unsigned long flags;
-#define NFS_CONTEXT_RESEND_WRITES (1)
#define NFS_CONTEXT_BAD (2)
#define NFS_CONTEXT_UNLOCK (3)
#define NFS_CONTEXT_FILE_OPEN (4)
@@ -182,6 +182,7 @@ struct nfs_inode {
/* Regular file */
struct {
atomic_long_t nrequests;
+ atomic_long_t redirtied_pages;
struct nfs_mds_commit_info commit_info;
struct mutex commit_mutex;
};
diff --git a/include/linux/node.h b/include/linux/node.h
index 40d641a8bfb0..427a5975cf40 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -2,15 +2,15 @@
/*
* include/linux/node.h - generic node definition
*
- * This is mainly for topological representation. We define the
- * basic 'struct node' here, which can be embedded in per-arch
+ * This is mainly for topological representation. We define the
+ * basic 'struct node' here, which can be embedded in per-arch
* definitions of processors.
*
* Basic handling of the devices is done in drivers/base/node.c
- * and system devices are handled in drivers/base/sys.c.
+ * and system devices are handled in drivers/base/sys.c.
*
* Nodes are exported via driverfs in the class/node/devices/
- * directory.
+ * directory.
*/
#ifndef _LINUX_NODE_H_
#define _LINUX_NODE_H_
@@ -18,7 +18,6 @@
#include <linux/device.h>
#include <linux/cpumask.h>
#include <linux/list.h>
-#include <linux/workqueue.h>
/**
* struct node_hmem_attrs - heterogeneous memory performance attributes
@@ -84,10 +83,6 @@ static inline void node_set_perf_attrs(unsigned int nid,
struct node {
struct device dev;
struct list_head access_list;
-
-#if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_HUGETLBFS)
- struct work_struct node_work;
-#endif
#ifdef CONFIG_HMEM_REPORTING
struct list_head cache_attrs;
struct device *cache_dev;
@@ -96,7 +91,6 @@ struct node {
struct memory_block;
extern struct node *node_devices[];
-typedef void (*node_registration_func_t)(struct node *);
#if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_NUMA)
void register_memory_blocks_under_node(int nid, unsigned long start_pfn,
@@ -144,11 +138,6 @@ extern void unregister_memory_block_under_nodes(struct memory_block *mem_blk);
extern int register_memory_node_under_compute_node(unsigned int mem_nid,
unsigned int cpu_nid,
unsigned access);
-
-#ifdef CONFIG_HUGETLBFS
-extern void register_hugetlbfs_with_node(node_registration_func_t doregister,
- node_registration_func_t unregister);
-#endif
#else
static inline void node_dev_init(void)
{
@@ -176,18 +165,8 @@ static inline int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
static inline void unregister_memory_block_under_nodes(struct memory_block *mem_blk)
{
}
-
-static inline void register_hugetlbfs_with_node(node_registration_func_t reg,
- node_registration_func_t unreg)
-{
-}
#endif
#define to_node(device) container_of(device, struct node, dev)
-static inline bool node_is_toptier(int node)
-{
- return node_state(node, N_CPU);
-}
-
#endif /* _LINUX_NODE_H_ */
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index 4b71a96190a8..bb0ee80526b2 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -493,6 +493,7 @@ static inline int num_node_state(enum node_states state)
#define first_online_node 0
#define first_memory_node 0
#define next_online_node(nid) (MAX_NUMNODES)
+#define next_memory_node(nid) (MAX_NUMNODES)
#define nr_node_ids 1U
#define nr_online_nodes 1U
@@ -504,12 +505,20 @@ static inline int num_node_state(enum node_states state)
static inline int node_random(const nodemask_t *maskp)
{
#if defined(CONFIG_NUMA) && (MAX_NUMNODES > 1)
- int w, bit = NUMA_NO_NODE;
+ int w, bit;
w = nodes_weight(*maskp);
- if (w)
- bit = bitmap_ord_to_pos(maskp->bits,
- get_random_int() % w, MAX_NUMNODES);
+ switch (w) {
+ case 0:
+ bit = NUMA_NO_NODE;
+ break;
+ case 1:
+ bit = first_node(*maskp);
+ break;
+ default:
+ bit = find_nth_bit(maskp->bits, MAX_NUMNODES, get_random_u32_below(w));
+ break;
+ }
return bit;
#else
return 0;
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index cdb171efc7cb..fee881cded01 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -94,6 +94,7 @@ static inline struct cred *nsset_cred(struct nsset *set)
int copy_namespaces(unsigned long flags, struct task_struct *tsk);
void exit_task_namespaces(struct task_struct *tsk);
void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new);
+int exec_task_namespaces(void);
void free_nsproxy(struct nsproxy *ns);
int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **,
struct cred *, struct fs_struct *);
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index ae53d74f3696..d6be2a686100 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -797,6 +797,7 @@ enum nvme_opcode {
nvme_cmd_zone_mgmt_send = 0x79,
nvme_cmd_zone_mgmt_recv = 0x7a,
nvme_cmd_zone_append = 0x7d,
+ nvme_cmd_vendor_start = 0x80,
};
#define nvme_opcode_name(opcode) { opcode, #opcode }
@@ -963,6 +964,7 @@ enum {
NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12,
NVME_RW_PRINFO_PRACT = 1 << 13,
NVME_RW_DTYPE_STREAMS = 1 << 4,
+ NVME_WZ_DEAC = 1 << 9,
};
struct nvme_dsm_cmd {
@@ -1482,8 +1484,8 @@ struct nvmf_connect_command {
};
enum {
- NVME_CONNECT_AUTHREQ_ASCR = (1 << 2),
- NVME_CONNECT_AUTHREQ_ATR = (1 << 1),
+ NVME_CONNECT_AUTHREQ_ASCR = (1U << 18),
+ NVME_CONNECT_AUTHREQ_ATR = (1U << 17),
};
struct nvmf_connect_data {
diff --git a/include/linux/of.h b/include/linux/of.h
index 766d002bddb9..8b9f94386dc3 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -342,7 +342,7 @@ extern int of_property_read_string_helper(const struct device_node *np,
const char **out_strs, size_t sz, int index);
extern int of_device_is_compatible(const struct device_node *device,
const char *);
-extern int of_device_compatible_match(struct device_node *device,
+extern int of_device_compatible_match(const struct device_node *device,
const char *const *compat);
extern bool of_device_is_available(const struct device_node *device);
extern bool of_device_is_big_endian(const struct device_node *device);
@@ -562,7 +562,7 @@ static inline int of_device_is_compatible(const struct device_node *device,
return 0;
}
-static inline int of_device_compatible_match(struct device_node *device,
+static inline int of_device_compatible_match(const struct device_node *device,
const char *const *compat)
{
return 0;
@@ -1549,9 +1549,9 @@ enum of_overlay_notify_action {
OF_OVERLAY_POST_REMOVE,
};
-static inline char *of_overlay_action_name(enum of_overlay_notify_action action)
+static inline const char *of_overlay_action_name(enum of_overlay_notify_action action)
{
- static char *of_overlay_action_name[] = {
+ static const char *const of_overlay_action_name[] = {
"init",
"pre-apply",
"post-apply",
diff --git a/include/linux/of_address.h b/include/linux/of_address.h
index 45598dbec269..265f26eeaf6b 100644
--- a/include/linux/of_address.h
+++ b/include/linux/of_address.h
@@ -154,4 +154,15 @@ static inline const __be32 *of_get_pci_address(struct device_node *dev, int bar_
return __of_get_address(dev, -1, bar_no, size, flags);
}
+static inline int of_address_count(struct device_node *np)
+{
+ struct resource res;
+ int count = 0;
+
+ while (of_address_to_resource(np, count, &res) == 0)
+ count++;
+
+ return count;
+}
+
#endif /* __OF_ADDRESS_H */
diff --git a/include/linux/of_device.h b/include/linux/of_device.h
index 1d7992a02e36..1a803e4335d3 100644
--- a/include/linux/of_device.h
+++ b/include/linux/of_device.h
@@ -101,8 +101,9 @@ static inline struct device_node *of_cpu_device_node_get(int cpu)
}
static inline int of_dma_configure_id(struct device *dev,
- struct device_node *np,
- bool force_dma)
+ struct device_node *np,
+ bool force_dma,
+ const u32 *id)
{
return 0;
}
diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index 83fccd0c9bba..d6d3eae2f145 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -37,9 +37,8 @@ extern unsigned int irq_create_of_mapping(struct of_phandle_args *irq_data);
extern int of_irq_to_resource(struct device_node *dev, int index,
struct resource *r);
-extern void of_irq_init(const struct of_device_id *matches);
-
#ifdef CONFIG_OF_IRQ
+extern void of_irq_init(const struct of_device_id *matches);
extern int of_irq_parse_one(struct device_node *device, int index,
struct of_phandle_args *out_irq);
extern int of_irq_count(struct device_node *dev);
@@ -57,6 +56,9 @@ extern struct irq_domain *of_msi_map_get_device_domain(struct device *dev,
extern void of_msi_configure(struct device *dev, struct device_node *np);
u32 of_msi_map_id(struct device *dev, struct device_node *msi_np, u32 id_in);
#else
+static inline void of_irq_init(const struct of_device_id *matches)
+{
+}
static inline int of_irq_parse_one(struct device_node *device, int index,
struct of_phandle_args *out_irq)
{
diff --git a/include/linux/of_net.h b/include/linux/of_net.h
index 0484b613ca64..d88715a0b3a5 100644
--- a/include/linux/of_net.h
+++ b/include/linux/of_net.h
@@ -14,6 +14,7 @@
struct net_device;
extern int of_get_phy_mode(struct device_node *np, phy_interface_t *interface);
extern int of_get_mac_address(struct device_node *np, u8 *mac);
+extern int of_get_mac_address_nvmem(struct device_node *np, u8 *mac);
int of_get_ethdev_address(struct device_node *np, struct net_device *dev);
extern struct net_device *of_find_net_device_by_node(struct device_node *np);
#else
@@ -28,6 +29,11 @@ static inline int of_get_mac_address(struct device_node *np, u8 *mac)
return -ENODEV;
}
+static inline int of_get_mac_address_nvmem(struct device_node *np, u8 *mac)
+{
+ return -ENODEV;
+}
+
static inline int of_get_ethdev_address(struct device_node *np, struct net_device *dev)
{
return -ENODEV;
diff --git a/include/linux/once.h b/include/linux/once.h
index b14d8b309d52..bc714d414448 100644
--- a/include/linux/once.h
+++ b/include/linux/once.h
@@ -5,10 +5,18 @@
#include <linux/types.h>
#include <linux/jump_label.h>
+/* Helpers used from arbitrary contexts.
+ * Hard irqs are blocked, be cautious.
+ */
bool __do_once_start(bool *done, unsigned long *flags);
void __do_once_done(bool *done, struct static_key_true *once_key,
unsigned long *flags, struct module *mod);
+/* Variant for process contexts only. */
+bool __do_once_sleepable_start(bool *done);
+void __do_once_sleepable_done(bool *done, struct static_key_true *once_key,
+ struct module *mod);
+
/* Call a function exactly once. The idea of DO_ONCE() is to perform
* a function call such as initialization of random seeds, etc, only
* once, where DO_ONCE() can live in the fast-path. After @func has
@@ -52,7 +60,27 @@ void __do_once_done(bool *done, struct static_key_true *once_key,
___ret; \
})
+/* Variant of DO_ONCE() for process/sleepable contexts. */
+#define DO_ONCE_SLEEPABLE(func, ...) \
+ ({ \
+ bool ___ret = false; \
+ static bool __section(".data.once") ___done = false; \
+ static DEFINE_STATIC_KEY_TRUE(___once_key); \
+ if (static_branch_unlikely(&___once_key)) { \
+ ___ret = __do_once_sleepable_start(&___done); \
+ if (unlikely(___ret)) { \
+ func(__VA_ARGS__); \
+ __do_once_sleepable_done(&___done, &___once_key,\
+ THIS_MODULE); \
+ } \
+ } \
+ ___ret; \
+ })
+
#define get_random_once(buf, nbytes) \
DO_ONCE(get_random_bytes, (buf), (nbytes))
+#define get_random_sleepable_once(buf, nbytes) \
+ DO_ONCE_SLEEPABLE(get_random_bytes, (buf), (nbytes))
+
#endif /* _LINUX_ONCE_H */
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 02d1e7bbd8cd..7d0c9c48a0c5 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -78,15 +78,6 @@ static inline bool tsk_is_oom_victim(struct task_struct * tsk)
}
/*
- * Use this helper if tsk->mm != mm and the victim mm needs a special
- * handling. This is guaranteed to stay true after once set.
- */
-static inline bool mm_is_oom_victim(struct mm_struct *mm)
-{
- return test_bit(MMF_OOM_VICTIM, &mm->flags);
-}
-
-/*
* Checks whether a page fault on the given mm is still reliable.
* This is no longer true if the oom reaper started to reap the
* address space which is reflected by MMF_UNSTABLE flag set in
@@ -106,8 +97,6 @@ static inline vm_fault_t check_stable_address_space(struct mm_struct *mm)
return 0;
}
-bool __oom_reap_task_mm(struct mm_struct *mm);
-
long oom_badness(struct task_struct *p,
unsigned long totalpages);
diff --git a/include/linux/overflow.h b/include/linux/overflow.h
index f1221d11f8e5..0e33b5cbdb9f 100644
--- a/include/linux/overflow.h
+++ b/include/linux/overflow.h
@@ -30,7 +30,6 @@
* https://mail-index.netbsd.org/tech-misc/2007/02/05/0000.html -
* credit to Christian Biere.
*/
-#define is_signed_type(type) (((type)(-1)) < (type)1)
#define __type_half_max(type) ((type)1 << (8*sizeof(type) - 1 - is_signed_type(type)))
#define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T)))
#define type_min(T) ((T)((T)-type_max(T)-(T)1))
@@ -52,58 +51,69 @@ static inline bool __must_check __must_check_overflow(bool overflow)
return unlikely(overflow);
}
-/*
- * For simplicity and code hygiene, the fallback code below insists on
- * a, b and *d having the same type (similar to the min() and max()
- * macros), whereas gcc's type-generic overflow checkers accept
- * different types. Hence we don't just make check_add_overflow an
- * alias for __builtin_add_overflow, but add type checks similar to
- * below.
- */
-#define check_add_overflow(a, b, d) __must_check_overflow(({ \
- typeof(a) __a = (a); \
- typeof(b) __b = (b); \
- typeof(d) __d = (d); \
- (void) (&__a == &__b); \
- (void) (&__a == __d); \
- __builtin_add_overflow(__a, __b, __d); \
-}))
-
-#define check_sub_overflow(a, b, d) __must_check_overflow(({ \
- typeof(a) __a = (a); \
- typeof(b) __b = (b); \
- typeof(d) __d = (d); \
- (void) (&__a == &__b); \
- (void) (&__a == __d); \
- __builtin_sub_overflow(__a, __b, __d); \
-}))
+/**
+ * check_add_overflow() - Calculate addition with overflow checking
+ * @a: first addend
+ * @b: second addend
+ * @d: pointer to store sum
+ *
+ * Returns 0 on success.
+ *
+ * *@d holds the results of the attempted addition, but is not considered
+ * "safe for use" on a non-zero return value, which indicates that the
+ * sum has overflowed or been truncated.
+ */
+#define check_add_overflow(a, b, d) \
+ __must_check_overflow(__builtin_add_overflow(a, b, d))
-#define check_mul_overflow(a, b, d) __must_check_overflow(({ \
- typeof(a) __a = (a); \
- typeof(b) __b = (b); \
- typeof(d) __d = (d); \
- (void) (&__a == &__b); \
- (void) (&__a == __d); \
- __builtin_mul_overflow(__a, __b, __d); \
-}))
+/**
+ * check_sub_overflow() - Calculate subtraction with overflow checking
+ * @a: minuend; value to subtract from
+ * @b: subtrahend; value to subtract from @a
+ * @d: pointer to store difference
+ *
+ * Returns 0 on success.
+ *
+ * *@d holds the results of the attempted subtraction, but is not considered
+ * "safe for use" on a non-zero return value, which indicates that the
+ * difference has underflowed or been truncated.
+ */
+#define check_sub_overflow(a, b, d) \
+ __must_check_overflow(__builtin_sub_overflow(a, b, d))
-/** check_shl_overflow() - Calculate a left-shifted value and check overflow
+/**
+ * check_mul_overflow() - Calculate multiplication with overflow checking
+ * @a: first factor
+ * @b: second factor
+ * @d: pointer to store product
+ *
+ * Returns 0 on success.
*
+ * *@d holds the results of the attempted multiplication, but is not
+ * considered "safe for use" on a non-zero return value, which indicates
+ * that the product has overflowed or been truncated.
+ */
+#define check_mul_overflow(a, b, d) \
+ __must_check_overflow(__builtin_mul_overflow(a, b, d))
+
+/**
+ * check_shl_overflow() - Calculate a left-shifted value and check overflow
* @a: Value to be shifted
* @s: How many bits left to shift
* @d: Pointer to where to store the result
*
* Computes *@d = (@a << @s)
*
- * Returns true if '*d' cannot hold the result or when 'a << s' doesn't
+ * Returns true if '*@d' cannot hold the result or when '@a << @s' doesn't
* make sense. Example conditions:
- * - 'a << s' causes bits to be lost when stored in *d.
- * - 's' is garbage (e.g. negative) or so large that the result of
- * 'a << s' is guaranteed to be 0.
- * - 'a' is negative.
- * - 'a << s' sets the sign bit, if any, in '*d'.
*
- * '*d' will hold the results of the attempted shift, but is not
+ * - '@a << @s' causes bits to be lost when stored in *@d.
+ * - '@s' is garbage (e.g. negative) or so large that the result of
+ * '@a << @s' is guaranteed to be 0.
+ * - '@a' is negative.
+ * - '@a << @s' sets the sign bit, if any, in '*@d'.
+ *
+ * '*@d' will hold the results of the attempted shift, but is not
* considered "safe for use" if true is returned.
*/
#define check_shl_overflow(a, s, d) __must_check_overflow(({ \
@@ -118,9 +128,55 @@ static inline bool __must_check __must_check_overflow(bool overflow)
(*_d >> _to_shift) != _a); \
}))
+#define __overflows_type_constexpr(x, T) ( \
+ is_unsigned_type(typeof(x)) ? \
+ (x) > type_max(typeof(T)) : \
+ is_unsigned_type(typeof(T)) ? \
+ (x) < 0 || (x) > type_max(typeof(T)) : \
+ (x) < type_min(typeof(T)) || (x) > type_max(typeof(T)))
+
+#define __overflows_type(x, T) ({ \
+ typeof(T) v = 0; \
+ check_add_overflow((x), v, &v); \
+})
+
/**
- * size_mul() - Calculate size_t multiplication with saturation at SIZE_MAX
+ * overflows_type - helper for checking the overflows between value, variables,
+ * or data type
+ *
+ * @n: source constant value or variable to be checked
+ * @T: destination variable or data type proposed to store @x
+ *
+ * Compares the @x expression for whether or not it can safely fit in
+ * the storage of the type in @T. @x and @T can have different types.
+ * If @x is a constant expression, this will also resolve to a constant
+ * expression.
+ *
+ * Returns: true if overflow can occur, false otherwise.
+ */
+#define overflows_type(n, T) \
+ __builtin_choose_expr(__is_constexpr(n), \
+ __overflows_type_constexpr(n, T), \
+ __overflows_type(n, T))
+
+/**
+ * castable_to_type - like __same_type(), but also allows for casted literals
*
+ * @n: variable or constant value
+ * @T: variable or data type
+ *
+ * Unlike the __same_type() macro, this allows a constant value as the
+ * first argument. If this value would not overflow into an assignment
+ * of the second argument's type, it returns true. Otherwise, this falls
+ * back to __same_type().
+ */
+#define castable_to_type(n, T) \
+ __builtin_choose_expr(__is_constexpr(n), \
+ !__overflows_type_constexpr(n, T), \
+ __same_type(n, T))
+
+/**
+ * size_mul() - Calculate size_t multiplication with saturation at SIZE_MAX
* @factor1: first factor
* @factor2: second factor
*
@@ -140,7 +196,6 @@ static inline size_t __must_check size_mul(size_t factor1, size_t factor2)
/**
* size_add() - Calculate size_t addition with saturation at SIZE_MAX
- *
* @addend1: first addend
* @addend2: second addend
*
@@ -160,7 +215,6 @@ static inline size_t __must_check size_add(size_t addend1, size_t addend2)
/**
* size_sub() - Calculate size_t subtraction with saturation at SIZE_MAX
- *
* @minuend: value to subtract from
* @subtrahend: value to subtract from @minuend
*
@@ -183,7 +237,6 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
/**
* array_size() - Calculate size of 2-dimensional array.
- *
* @a: dimension one
* @b: dimension two
*
@@ -196,7 +249,6 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
/**
* array3_size() - Calculate size of 3-dimensional array.
- *
* @a: dimension one
* @b: dimension two
* @c: dimension three
@@ -211,7 +263,6 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
/**
* flex_array_size() - Calculate size of a flexible array member
* within an enclosing structure.
- *
* @p: Pointer to the structure.
* @member: Name of the flexible array member.
* @count: Number of elements in the array.
@@ -228,7 +279,6 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
/**
* struct_size() - Calculate size of structure with trailing flexible array.
- *
* @p: Pointer to the structure.
* @member: Name of the array member.
* @count: Number of elements in the array.
diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h
index ef1e3e736e14..7d79818dc065 100644
--- a/include/linux/page-flags-layout.h
+++ b/include/linux/page-flags-layout.h
@@ -55,7 +55,8 @@
#define SECTIONS_WIDTH 0
#endif
-#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS
+#if ZONES_WIDTH + LRU_GEN_WIDTH + SECTIONS_WIDTH + NODES_SHIFT \
+ <= BITS_PER_LONG - NR_PAGEFLAGS
#define NODES_WIDTH NODES_SHIFT
#elif defined(CONFIG_SPARSEMEM_VMEMMAP)
#error "Vmemmap: No space for nodes field in page flags"
@@ -89,8 +90,8 @@
#define LAST_CPUPID_SHIFT 0
#endif
-#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + KASAN_TAG_WIDTH + LAST_CPUPID_SHIFT \
- <= BITS_PER_LONG - NR_PAGEFLAGS
+#if ZONES_WIDTH + LRU_GEN_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + \
+ KASAN_TAG_WIDTH + LAST_CPUPID_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS
#define LAST_CPUPID_WIDTH LAST_CPUPID_SHIFT
#else
#define LAST_CPUPID_WIDTH 0
@@ -100,10 +101,15 @@
#define LAST_CPUPID_NOT_IN_PAGE_FLAGS
#endif
-#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + KASAN_TAG_WIDTH + LAST_CPUPID_WIDTH \
- > BITS_PER_LONG - NR_PAGEFLAGS
+#if ZONES_WIDTH + LRU_GEN_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + \
+ KASAN_TAG_WIDTH + LAST_CPUPID_WIDTH > BITS_PER_LONG - NR_PAGEFLAGS
#error "Not enough bits in page flags"
#endif
+/* see the comment on MAX_NR_TIERS */
+#define LRU_REFS_WIDTH min(__LRU_REFS_WIDTH, BITS_PER_LONG - NR_PAGEFLAGS - \
+ ZONES_WIDTH - LRU_GEN_WIDTH - SECTIONS_WIDTH - \
+ NODES_WIDTH - KASAN_TAG_WIDTH - LAST_CPUPID_WIDTH)
+
#endif
#endif /* _LINUX_PAGE_FLAGS_LAYOUT */
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 465ff35a8c00..9aec9fd8c50b 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -176,9 +176,6 @@ enum pageflags {
/* SLOB */
PG_slob_free = PG_private,
- /* Compound pages. Stored in first tail page's flags */
- PG_double_map = PG_workingset,
-
#ifdef CONFIG_MEMORY_FAILURE
/*
* Compound pages. Stored in first tail page's flags.
@@ -641,7 +638,7 @@ PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted)
* Different with flags above, this flag is used only for fsdax mode. It
* indicates that this page->mapping is now under reflink case.
*/
-#define PAGE_MAPPING_DAX_COW 0x1
+#define PAGE_MAPPING_DAX_SHARED ((void *)0x1)
static __always_inline bool folio_mapping_flags(struct folio *folio)
{
@@ -874,29 +871,11 @@ static inline int PageTransTail(struct page *page)
{
return PageTail(page);
}
-
-/*
- * PageDoubleMap indicates that the compound page is mapped with PTEs as well
- * as PMDs.
- *
- * This is required for optimization of rmap operations for THP: we can postpone
- * per small page mapcount accounting (and its overhead from atomic operations)
- * until the first PMD split.
- *
- * For the page PageDoubleMap means ->_mapcount in all sub-pages is offset up
- * by one. This reference will go away with last compound_mapcount.
- *
- * See also __split_huge_pmd_locked() and page_remove_anon_compound_rmap().
- */
-PAGEFLAG(DoubleMap, double_map, PF_SECOND)
- TESTSCFLAG(DoubleMap, double_map, PF_SECOND)
#else
TESTPAGEFLAG_FALSE(TransHuge, transhuge)
TESTPAGEFLAG_FALSE(TransCompound, transcompound)
TESTPAGEFLAG_FALSE(TransCompoundMap, transcompoundmap)
TESTPAGEFLAG_FALSE(TransTail, transtail)
-PAGEFLAG_FALSE(DoubleMap, double_map)
- TESTSCFLAG_FALSE(DoubleMap, double_map)
#endif
#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
@@ -1058,7 +1037,7 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page)
1UL << PG_private | 1UL << PG_private_2 | \
1UL << PG_writeback | 1UL << PG_reserved | \
1UL << PG_slab | 1UL << PG_active | \
- 1UL << PG_unevictable | __PG_MLOCKED)
+ 1UL << PG_unevictable | __PG_MLOCKED | LRU_GEN_MASK)
/*
* Flags checked when a page is prepped for return by the page allocator.
@@ -1069,7 +1048,7 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page)
* alloc-free cycle to prevent from reusing the page.
*/
#define PAGE_FLAGS_CHECK_AT_PREP \
- (PAGEFLAGS_MASK & ~__PG_HWPOISON)
+ ((PAGEFLAGS_MASK & ~__PG_HWPOISON) | LRU_GEN_MASK | LRU_REFS_MASK)
#define PAGE_FLAGS_PRIVATE \
(1UL << PG_private | 1UL << PG_private_2)
diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h
index 679591301994..c141ea9a95ef 100644
--- a/include/linux/page_counter.h
+++ b/include/linux/page_counter.h
@@ -3,15 +3,17 @@
#define _LINUX_PAGE_COUNTER_H
#include <linux/atomic.h>
+#include <linux/cache.h>
#include <linux/kernel.h>
#include <asm/page.h>
struct page_counter {
+ /*
+ * Make sure 'usage' does not share cacheline with any other field. The
+ * memcg->memory.usage is a hot member of struct mem_cgroup.
+ */
atomic_long_t usage;
- unsigned long min;
- unsigned long low;
- unsigned long high;
- unsigned long max;
+ CACHELINE_PADDING(_pad1_);
/* effective memory.min and memory.min usage tracking */
unsigned long emin;
@@ -23,18 +25,18 @@ struct page_counter {
atomic_long_t low_usage;
atomic_long_t children_low_usage;
- /* legacy */
unsigned long watermark;
unsigned long failcnt;
- /*
- * 'parent' is placed here to be far from 'usage' to reduce
- * cache false sharing, as 'usage' is written mostly while
- * parent is frequently read for cgroup's hierarchical
- * counting nature.
- */
+ /* Keep all the read most fields in a separete cacheline. */
+ CACHELINE_PADDING(_pad2_);
+
+ unsigned long min;
+ unsigned long low;
+ unsigned long high;
+ unsigned long max;
struct page_counter *parent;
-};
+} ____cacheline_internodealigned_in_smp;
#if BITS_PER_LONG == 32
#define PAGE_COUNTER_MAX LONG_MAX
diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h
index fabb2e1e087f..22be4582faae 100644
--- a/include/linux/page_ext.h
+++ b/include/linux/page_ext.h
@@ -36,9 +36,15 @@ struct page_ext {
unsigned long flags;
};
+extern bool early_page_ext;
extern unsigned long page_ext_size;
extern void pgdat_page_ext_init(struct pglist_data *pgdat);
+static inline bool early_page_ext_enabled(void)
+{
+ return early_page_ext;
+}
+
#ifdef CONFIG_SPARSEMEM
static inline void page_ext_init_flatmem(void)
{
@@ -55,7 +61,8 @@ static inline void page_ext_init(void)
}
#endif
-struct page_ext *lookup_page_ext(const struct page *page);
+extern struct page_ext *page_ext_get(struct page *page);
+extern void page_ext_put(struct page_ext *page_ext);
static inline struct page_ext *page_ext_next(struct page_ext *curr)
{
@@ -67,13 +74,13 @@ static inline struct page_ext *page_ext_next(struct page_ext *curr)
#else /* !CONFIG_PAGE_EXTENSION */
struct page_ext;
-static inline void pgdat_page_ext_init(struct pglist_data *pgdat)
+static inline bool early_page_ext_enabled(void)
{
+ return false;
}
-static inline struct page_ext *lookup_page_ext(const struct page *page)
+static inline void pgdat_page_ext_init(struct pglist_data *pgdat)
{
- return NULL;
}
static inline void page_ext_init(void)
@@ -87,5 +94,14 @@ static inline void page_ext_init_flatmem_late(void)
static inline void page_ext_init_flatmem(void)
{
}
+
+static inline struct page_ext *page_ext_get(struct page *page)
+{
+ return NULL;
+}
+
+static inline void page_ext_put(struct page_ext *page_ext)
+{
+}
#endif /* CONFIG_PAGE_EXTENSION */
#endif /* __LINUX_PAGE_EXT_H */
diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h
index 4663dfed1293..5cb7bd2078ec 100644
--- a/include/linux/page_idle.h
+++ b/include/linux/page_idle.h
@@ -13,65 +13,79 @@
* If there is not enough space to store Idle and Young bits in page flags, use
* page ext flags instead.
*/
-
static inline bool folio_test_young(struct folio *folio)
{
- struct page_ext *page_ext = lookup_page_ext(&folio->page);
+ struct page_ext *page_ext = page_ext_get(&folio->page);
+ bool page_young;
if (unlikely(!page_ext))
return false;
- return test_bit(PAGE_EXT_YOUNG, &page_ext->flags);
+ page_young = test_bit(PAGE_EXT_YOUNG, &page_ext->flags);
+ page_ext_put(page_ext);
+
+ return page_young;
}
static inline void folio_set_young(struct folio *folio)
{
- struct page_ext *page_ext = lookup_page_ext(&folio->page);
+ struct page_ext *page_ext = page_ext_get(&folio->page);
if (unlikely(!page_ext))
return;
set_bit(PAGE_EXT_YOUNG, &page_ext->flags);
+ page_ext_put(page_ext);
}
static inline bool folio_test_clear_young(struct folio *folio)
{
- struct page_ext *page_ext = lookup_page_ext(&folio->page);
+ struct page_ext *page_ext = page_ext_get(&folio->page);
+ bool page_young;
if (unlikely(!page_ext))
return false;
- return test_and_clear_bit(PAGE_EXT_YOUNG, &page_ext->flags);
+ page_young = test_and_clear_bit(PAGE_EXT_YOUNG, &page_ext->flags);
+ page_ext_put(page_ext);
+
+ return page_young;
}
static inline bool folio_test_idle(struct folio *folio)
{
- struct page_ext *page_ext = lookup_page_ext(&folio->page);
+ struct page_ext *page_ext = page_ext_get(&folio->page);
+ bool page_idle;
if (unlikely(!page_ext))
return false;
- return test_bit(PAGE_EXT_IDLE, &page_ext->flags);
+ page_idle = test_bit(PAGE_EXT_IDLE, &page_ext->flags);
+ page_ext_put(page_ext);
+
+ return page_idle;
}
static inline void folio_set_idle(struct folio *folio)
{
- struct page_ext *page_ext = lookup_page_ext(&folio->page);
+ struct page_ext *page_ext = page_ext_get(&folio->page);
if (unlikely(!page_ext))
return;
set_bit(PAGE_EXT_IDLE, &page_ext->flags);
+ page_ext_put(page_ext);
}
static inline void folio_clear_idle(struct folio *folio)
{
- struct page_ext *page_ext = lookup_page_ext(&folio->page);
+ struct page_ext *page_ext = page_ext_get(&folio->page);
if (unlikely(!page_ext))
return;
clear_bit(PAGE_EXT_IDLE, &page_ext->flags);
+ page_ext_put(page_ext);
}
#endif /* !CONFIG_64BIT */
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index 83c7248053a1..5f1ae07d724b 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -53,6 +53,10 @@ extern unsigned int pageblock_order;
#endif /* CONFIG_HUGETLB_PAGE */
#define pageblock_nr_pages (1UL << pageblock_order)
+#define pageblock_align(pfn) ALIGN((pfn), pageblock_nr_pages)
+#define pageblock_aligned(pfn) IS_ALIGNED((pfn), pageblock_nr_pages)
+#define pageblock_start_pfn(pfn) ALIGN_DOWN((pfn), pageblock_nr_pages)
+#define pageblock_end_pfn(pfn) ALIGN((pfn) + 1, pageblock_nr_pages)
/* Forward declaration */
struct page;
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 0178b2040ea3..29e1f9e76eb6 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -504,9 +504,8 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping,
#define FGP_NOFS 0x00000010
#define FGP_NOWAIT 0x00000020
#define FGP_FOR_MMAP 0x00000040
-#define FGP_HEAD 0x00000080
-#define FGP_ENTRY 0x00000100
-#define FGP_STABLE 0x00000200
+#define FGP_ENTRY 0x00000080
+#define FGP_STABLE 0x00000100
struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
int fgp_flags, gfp_t gfp);
@@ -718,8 +717,8 @@ static inline struct page *find_subpage(struct page *head, pgoff_t index)
unsigned filemap_get_folios(struct address_space *mapping, pgoff_t *start,
pgoff_t end, struct folio_batch *fbatch);
-unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
- unsigned int nr_pages, struct page **pages);
+unsigned filemap_get_folios_contig(struct address_space *mapping,
+ pgoff_t *start, pgoff_t end, struct folio_batch *fbatch);
unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
pgoff_t end, xa_mark_t tag, unsigned int nr_pages,
struct page **pages);
@@ -989,19 +988,16 @@ static inline int lock_page_killable(struct page *page)
}
/*
- * lock_page_or_retry - Lock the page, unless this would block and the
+ * folio_lock_or_retry - Lock the folio, unless this would block and the
* caller indicated that it can handle a retry.
*
* Return value and mmap_lock implications depend on flags; see
* __folio_lock_or_retry().
*/
-static inline bool lock_page_or_retry(struct page *page, struct mm_struct *mm,
- unsigned int flags)
+static inline bool folio_lock_or_retry(struct folio *folio,
+ struct mm_struct *mm, unsigned int flags)
{
- struct folio *folio;
might_sleep();
-
- folio = page_folio(page);
return folio_trylock(folio) || __folio_lock_or_retry(folio, mm, flags);
}
@@ -1042,7 +1038,6 @@ static inline int wait_on_page_locked_killable(struct page *page)
return folio_wait_locked_killable(page_folio(page));
}
-int folio_put_wait_locked(struct folio *folio, int state);
void wait_on_page_writeback(struct page *page);
void folio_wait_writeback(struct folio *folio);
int folio_wait_writeback_killable(struct folio *folio);
@@ -1106,12 +1101,10 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
int filemap_add_folio(struct address_space *mapping, struct folio *folio,
pgoff_t index, gfp_t gfp);
void filemap_remove_folio(struct folio *folio);
-void delete_from_page_cache(struct page *page);
void __filemap_remove_folio(struct folio *folio, void *shadow);
-void replace_page_cache_page(struct page *old, struct page *new);
+void replace_page_cache_folio(struct folio *old, struct folio *new);
void delete_from_page_cache_batch(struct address_space *mapping,
struct folio_batch *fbatch);
-int try_to_release_page(struct page *page, gfp_t gfp);
bool filemap_release_folio(struct folio *folio, gfp_t gfp);
loff_t mapping_seek_hole_data(struct address_space *, loff_t start, loff_t end,
int whence);
@@ -1173,6 +1166,8 @@ struct readahead_control {
pgoff_t _index;
unsigned int _nr_pages;
unsigned int _batch_count;
+ bool _workingset;
+ unsigned long _pflags;
};
#define DEFINE_READAHEAD(ractl, f, r, m, i) \
diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h
index ac7b38ad5903..959f52e5867d 100644
--- a/include/linux/pagewalk.h
+++ b/include/linux/pagewalk.h
@@ -15,18 +15,20 @@ struct mm_walk;
* this handler is required to be able to handle
* pmd_trans_huge() pmds. They may simply choose to
* split_huge_page() instead of handling it explicitly.
- * @pte_entry: if set, called for each non-empty PTE (lowest-level)
- * entry
+ * @pte_entry: if set, called for each PTE (lowest-level) entry,
+ * including empty ones
* @pte_hole: if set, called for each hole at all levels,
- * depth is -1 if not known, 0:PGD, 1:P4D, 2:PUD, 3:PMD
- * 4:PTE. Any folded depths (where PTRS_PER_P?D is equal
- * to 1) are skipped.
+ * depth is -1 if not known, 0:PGD, 1:P4D, 2:PUD, 3:PMD.
+ * Any folded depths (where PTRS_PER_P?D is equal to 1)
+ * are skipped.
* @hugetlb_entry: if set, called for each hugetlb entry
* @test_walk: caller specific callback function to determine whether
* we walk over the current vma or not. Returning 0 means
* "do page table walk over the current vma", returning
* a negative value means "abort current page table walk
* right now" and returning 1 means "skip the current vma"
+ * Note that this callback is not called when the caller
+ * passes in a single VMA as for walk_page_vma().
* @pre_vma: if set, called before starting walk on a non-null vma.
* @post_vma: if set, called after a walk on a non-null vma, provided
* that @pre_vma and the vma walk succeeded.
@@ -99,6 +101,9 @@ int walk_page_range_novma(struct mm_struct *mm, unsigned long start,
unsigned long end, const struct mm_walk_ops *ops,
pgd_t *pgd,
void *private);
+int walk_page_range_vma(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end, const struct mm_walk_ops *ops,
+ void *private);
int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
void *private);
int walk_page_mapping(struct address_space *mapping, pgoff_t first_index,
diff --git a/include/linux/panic.h b/include/linux/panic.h
index c7759b3f2045..979b776e3bcb 100644
--- a/include/linux/panic.h
+++ b/include/linux/panic.h
@@ -11,6 +11,7 @@ extern long (*panic_blink)(int state);
__printf(1, 2)
void panic(const char *fmt, ...) __noreturn __cold;
void nmi_panic(struct pt_regs *regs, const char *msg);
+void check_panic_on_warn(const char *origin);
extern void oops_enter(void);
extern void oops_exit(void);
extern bool oops_may_print(void);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 060af91bafcd..adffd65e84b4 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -38,6 +38,7 @@
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/resource_ext.h>
+#include <linux/msi_api.h>
#include <uapi/linux/pci.h>
#include <linux/pci_ids.h>
@@ -409,6 +410,7 @@ struct pci_dev {
*/
unsigned int irq;
struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */
+ struct resource driver_exclusive_resource; /* driver exclusive resource ranges */
bool match_driver; /* Skip attaching driver */
@@ -475,6 +477,7 @@ struct pci_dev {
unsigned int broken_cmd_compl:1; /* No compl for some cmds */
#endif
#ifdef CONFIG_PCIE_PTM
+ u16 ptm_cap; /* PTM Capability */
unsigned int ptm_root:1;
unsigned int ptm_enabled:1;
u8 ptm_granularity;
@@ -842,6 +845,9 @@ struct pci_error_handlers {
/* Device driver may resume normal operations */
void (*resume)(struct pci_dev *dev);
+
+ /* Allow device driver to record more details of a correctable error */
+ void (*cor_error_detected)(struct pci_dev *dev);
};
@@ -1406,6 +1412,21 @@ int pci_request_selected_regions(struct pci_dev *, int, const char *);
int pci_request_selected_regions_exclusive(struct pci_dev *, int, const char *);
void pci_release_selected_regions(struct pci_dev *, int);
+static inline __must_check struct resource *
+pci_request_config_region_exclusive(struct pci_dev *pdev, unsigned int offset,
+ unsigned int len, const char *name)
+{
+ return __request_region(&pdev->driver_exclusive_resource, offset, len,
+ name, IORESOURCE_EXCLUSIVE);
+}
+
+static inline void pci_release_config_region(struct pci_dev *pdev,
+ unsigned int offset,
+ unsigned int len)
+{
+ __release_region(&pdev->driver_exclusive_resource, offset, len);
+}
+
/* drivers/pci/bus.c */
void pci_add_resource(struct list_head *resources, struct resource *res);
void pci_add_resource_offset(struct list_head *resources, struct resource *res,
@@ -1552,10 +1573,17 @@ static inline int pci_enable_msix_exact(struct pci_dev *dev,
return rc;
return 0;
}
+int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs,
+ unsigned int max_vecs, unsigned int flags);
int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
unsigned int max_vecs, unsigned int flags,
struct irq_affinity *affd);
+bool pci_msix_can_alloc_dyn(struct pci_dev *dev);
+struct msi_map pci_msix_alloc_irq_at(struct pci_dev *dev, unsigned int index,
+ const struct irq_affinity_desc *affdesc);
+void pci_msix_free_irq(struct pci_dev *pdev, struct msi_map map);
+
void pci_free_irq_vectors(struct pci_dev *dev);
int pci_irq_vector(struct pci_dev *dev, unsigned int nr);
const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev, int vec);
@@ -1585,6 +1613,13 @@ pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
return 1;
return -ENOSPC;
}
+static inline int
+pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs,
+ unsigned int max_vecs, unsigned int flags)
+{
+ return pci_alloc_irq_vectors_affinity(dev, min_vecs, max_vecs,
+ flags, NULL);
+}
static inline void pci_free_irq_vectors(struct pci_dev *dev)
{
@@ -1677,10 +1712,12 @@ bool pci_ats_disabled(void);
#ifdef CONFIG_PCIE_PTM
int pci_enable_ptm(struct pci_dev *dev, u8 *granularity);
+void pci_disable_ptm(struct pci_dev *dev);
bool pcie_ptm_enabled(struct pci_dev *dev);
#else
static inline int pci_enable_ptm(struct pci_dev *dev, u8 *granularity)
{ return -EINVAL; }
+static inline void pci_disable_ptm(struct pci_dev *dev) { }
static inline bool pcie_ptm_enabled(struct pci_dev *dev)
{ return false; }
#endif
@@ -1723,6 +1760,7 @@ static inline int acpi_pci_bus_find_domain_nr(struct pci_bus *bus)
{ return 0; }
#endif
int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent);
+void pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent);
#endif
/* Some architectures require additional setup to direct VGA traffic */
@@ -1895,15 +1933,13 @@ pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
{
return -ENOSPC;
}
-#endif /* CONFIG_PCI */
-
static inline int
pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs,
unsigned int max_vecs, unsigned int flags)
{
- return pci_alloc_irq_vectors_affinity(dev, min_vecs, max_vecs, flags,
- NULL);
+ return -ENOSPC;
}
+#endif /* CONFIG_PCI */
/* Include architecture-dependent settings and functions */
@@ -2019,8 +2055,8 @@ enum pci_fixup_pass {
#ifdef CONFIG_LTO_CLANG
#define __DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class, \
class_shift, hook, stub) \
- void __cficanonical stub(struct pci_dev *dev); \
- void __cficanonical stub(struct pci_dev *dev) \
+ void stub(struct pci_dev *dev); \
+ void stub(struct pci_dev *dev) \
{ \
hook(dev); \
} \
@@ -2471,6 +2507,14 @@ static inline bool pci_is_thunderbolt_attached(struct pci_dev *pdev)
void pci_uevent_ers(struct pci_dev *pdev, enum pci_ers_result err_type);
#endif
+struct msi_domain_template;
+
+bool pci_create_ims_domain(struct pci_dev *pdev, const struct msi_domain_template *template,
+ unsigned int hwsize, void *data);
+struct msi_map pci_ims_alloc_irq(struct pci_dev *pdev, union msi_instance_cookie *icookie,
+ const struct irq_affinity_desc *affdesc);
+void pci_ims_free_irq(struct pci_dev *pdev, struct msi_map map);
+
#include <linux/dma-mapping.h>
#define pci_printk(level, pdev, fmt, arg...) \
@@ -2481,6 +2525,7 @@ void pci_uevent_ers(struct pci_dev *pdev, enum pci_ers_result err_type);
#define pci_crit(pdev, fmt, arg...) dev_crit(&(pdev)->dev, fmt, ##arg)
#define pci_err(pdev, fmt, arg...) dev_err(&(pdev)->dev, fmt, ##arg)
#define pci_warn(pdev, fmt, arg...) dev_warn(&(pdev)->dev, fmt, ##arg)
+#define pci_warn_once(pdev, fmt, arg...) dev_warn_once(&(pdev)->dev, fmt, ##arg)
#define pci_notice(pdev, fmt, arg...) dev_notice(&(pdev)->dev, fmt, ##arg)
#define pci_info(pdev, fmt, arg...) dev_info(&(pdev)->dev, fmt, ##arg)
#define pci_dbg(pdev, fmt, arg...) dev_dbg(&(pdev)->dev, fmt, ##arg)
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 6feade66efdb..b362d90eb9b0 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -75,6 +75,9 @@
#define PCI_CLASS_COMMUNICATION_MODEM 0x0703
#define PCI_CLASS_COMMUNICATION_OTHER 0x0780
+/* Interface for SERIAL/MODEM */
+#define PCI_SERIAL_16550_COMPATIBLE 0x02
+
#define PCI_BASE_CLASS_SYSTEM 0x08
#define PCI_CLASS_SYSTEM_PIC 0x0800
#define PCI_CLASS_SYSTEM_PIC_IOAPIC 0x080010
@@ -2079,6 +2082,9 @@
#define PCI_DEVICE_ID_ICE_1712 0x1712
#define PCI_DEVICE_ID_VT1724 0x1724
+#define PCI_VENDOR_ID_MICROSOFT 0x1414
+#define PCI_DEVICE_ID_HYPERV_VIDEO 0x5353
+
#define PCI_VENDOR_ID_OXSEMI 0x1415
#define PCI_DEVICE_ID_OXSEMI_12PCI840 0x8403
#define PCI_DEVICE_ID_OXSEMI_PCIe840 0xC000
diff --git a/include/linux/pcs-altera-tse.h b/include/linux/pcs-altera-tse.h
new file mode 100644
index 000000000000..92ab9f08e835
--- /dev/null
+++ b/include/linux/pcs-altera-tse.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022 Bootlin
+ *
+ * Maxime Chevallier <maxime.chevallier@bootlin.com>
+ */
+
+#ifndef __LINUX_PCS_ALTERA_TSE_H
+#define __LINUX_PCS_ALTERA_TSE_H
+
+struct phylink_pcs;
+struct net_device;
+
+struct phylink_pcs *alt_tse_pcs_create(struct net_device *ndev,
+ void __iomem *pcs_base, int reg_width);
+
+#endif /* __LINUX_PCS_ALTERA_TSE_H */
diff --git a/include/linux/pe.h b/include/linux/pe.h
index daf09ffffe38..6ffabf1e6d03 100644
--- a/include/linux/pe.h
+++ b/include/linux/pe.h
@@ -29,7 +29,14 @@
* handover_offset and xloadflags fields in the bootparams structure.
*/
#define LINUX_EFISTUB_MAJOR_VERSION 0x1
-#define LINUX_EFISTUB_MINOR_VERSION 0x0
+#define LINUX_EFISTUB_MINOR_VERSION 0x1
+
+/*
+ * LINUX_PE_MAGIC appears at offset 0x38 into the MS-DOS header of EFI bootable
+ * Linux kernel images that target the architecture as specified by the PE/COFF
+ * header machine type field.
+ */
+#define LINUX_PE_MAGIC 0x818223cd
#define MZ_MAGIC 0x5a4d /* "MZ" */
@@ -65,6 +72,8 @@
#define IMAGE_FILE_MACHINE_SH5 0x01a8
#define IMAGE_FILE_MACHINE_THUMB 0x01c2
#define IMAGE_FILE_MACHINE_WCEMIPSV2 0x0169
+#define IMAGE_FILE_MACHINE_LOONGARCH32 0x6232
+#define IMAGE_FILE_MACHINE_LOONGARCH64 0x6264
/* flags */
#define IMAGE_FILE_RELOCS_STRIPPED 0x0001
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index 5fda40f97fe9..36b942b67b7d 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -121,9 +121,15 @@ static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
preempt_enable();
}
+extern bool percpu_is_read_locked(struct percpu_rw_semaphore *);
extern void percpu_down_write(struct percpu_rw_semaphore *);
extern void percpu_up_write(struct percpu_rw_semaphore *);
+static inline bool percpu_is_write_locked(struct percpu_rw_semaphore *sem)
+{
+ return atomic_read(&sem->block);
+}
+
extern int __percpu_init_rwsem(struct percpu_rw_semaphore *,
const char *, struct lock_class_key *);
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index f1ec5ad1351c..1338ea2aa720 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -37,12 +37,11 @@
/*
* Percpu allocator can serve percpu allocations before slab is
* initialized which allows slab to depend on the percpu allocator.
- * The following two parameters decide how much resource to
- * preallocate for this. Keep PERCPU_DYNAMIC_RESERVE equal to or
- * larger than PERCPU_DYNAMIC_EARLY_SIZE.
+ * The following parameter decide how much resource to preallocate
+ * for this. Keep PERCPU_DYNAMIC_RESERVE equal to or larger than
+ * PERCPU_DYNAMIC_EARLY_SIZE.
*/
-#define PERCPU_DYNAMIC_EARLY_SLOTS 128
-#define PERCPU_DYNAMIC_EARLY_SIZE (12 << 10)
+#define PERCPU_DYNAMIC_EARLY_SIZE (20 << 10)
/*
* PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy
diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index 01861eebed79..a3aae8d57a42 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -13,7 +13,9 @@
#include <linux/threads.h>
#include <linux/percpu.h>
#include <linux/types.h>
-#include <linux/gfp.h>
+
+/* percpu_counter batch for local add or sub */
+#define PERCPU_COUNTER_LOCAL_BATCH INT_MAX
#ifdef CONFIG_SMP
@@ -43,6 +45,7 @@ void percpu_counter_set(struct percpu_counter *fbc, s64 amount);
void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount,
s32 batch);
s64 __percpu_counter_sum(struct percpu_counter *fbc);
+s64 percpu_counter_sum_all(struct percpu_counter *fbc);
int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch);
void percpu_counter_sync(struct percpu_counter *fbc);
@@ -56,6 +59,22 @@ static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
percpu_counter_add_batch(fbc, amount, percpu_counter_batch);
}
+/*
+ * With percpu_counter_add_local() and percpu_counter_sub_local(), counts
+ * are accumulated in local per cpu counter and not in fbc->count until
+ * local count overflows PERCPU_COUNTER_LOCAL_BATCH. This makes counter
+ * write efficient.
+ * But percpu_counter_sum(), instead of percpu_counter_read(), needs to be
+ * used to add up the counts from each CPU to account for all the local
+ * counts. So percpu_counter_add_local() and percpu_counter_sub_local()
+ * should be used when a counter is updated frequently and read rarely.
+ */
+static inline void
+percpu_counter_add_local(struct percpu_counter *fbc, s64 amount)
+{
+ percpu_counter_add_batch(fbc, amount, PERCPU_COUNTER_LOCAL_BATCH);
+}
+
static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc)
{
s64 ret = __percpu_counter_sum(fbc);
@@ -138,6 +157,13 @@ percpu_counter_add(struct percpu_counter *fbc, s64 amount)
preempt_enable();
}
+/* non-SMP percpu_counter_add_local is the same with percpu_counter_add */
+static inline void
+percpu_counter_add_local(struct percpu_counter *fbc, s64 amount)
+{
+ percpu_counter_add(fbc, amount);
+}
+
static inline void
percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch)
{
@@ -168,6 +194,11 @@ static inline s64 percpu_counter_sum(struct percpu_counter *fbc)
return percpu_counter_read(fbc);
}
+static inline s64 percpu_counter_sum_all(struct percpu_counter *fbc)
+{
+ return percpu_counter_read(fbc);
+}
+
static inline bool percpu_counter_initialized(struct percpu_counter *fbc)
{
return true;
@@ -193,4 +224,10 @@ static inline void percpu_counter_sub(struct percpu_counter *fbc, s64 amount)
percpu_counter_add(fbc, -amount);
}
+static inline void
+percpu_counter_sub_local(struct percpu_counter *fbc, s64 amount)
+{
+ percpu_counter_add_local(fbc, -amount);
+}
+
#endif /* _LINUX_PERCPU_COUNTER_H */
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index 0407a38b470a..ef914a600087 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -24,10 +24,11 @@
/*
* ARM PMU hw_event flags
*/
-/* Event uses a 64bit counter */
-#define ARMPMU_EVT_64BIT 1
-/* Event uses a 47bit counter */
-#define ARMPMU_EVT_47BIT 2
+#define ARMPMU_EVT_64BIT 0x00001 /* Event uses a 64bit counter */
+#define ARMPMU_EVT_47BIT 0x00002 /* Event uses a 47bit counter */
+
+static_assert((PERF_EVENT_FLAG_ARCH & ARMPMU_EVT_64BIT) == ARMPMU_EVT_64BIT);
+static_assert((PERF_EVENT_FLAG_ARCH & ARMPMU_EVT_47BIT) == ARMPMU_EVT_47BIT);
#define HW_OP_UNSUPPORTED 0xFFFF
#define C(_x) PERF_COUNT_HW_CACHE_##_x
@@ -99,7 +100,7 @@ struct arm_pmu {
void (*stop)(struct arm_pmu *);
void (*reset)(void *);
int (*map_event)(struct perf_event *event);
- int (*filter_match)(struct perf_event *event);
+ bool (*filter)(struct pmu *pmu, int cpu);
int num_events;
bool secure_access; /* 32-bit ARM only */
#define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40
@@ -173,7 +174,6 @@ void kvm_host_pmu_init(struct arm_pmu *pmu);
/* Internal functions only for core arm_pmu code */
struct arm_pmu *armpmu_alloc(void);
-struct arm_pmu *armpmu_alloc_atomic(void);
void armpmu_free(struct arm_pmu *pmu);
int armpmu_register(struct arm_pmu *pmu);
int armpmu_request_irq(int irq, int cpu);
diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h
index bf66fe011fa8..e17e86ad6f3a 100644
--- a/include/linux/perf/riscv_pmu.h
+++ b/include/linux/perf/riscv_pmu.h
@@ -45,7 +45,7 @@ struct riscv_pmu {
irqreturn_t (*handle_irq)(int irq_num, void *dev);
- int num_counters;
+ unsigned long cmask;
u64 (*ctr_read)(struct perf_event *event);
int (*ctr_get_idx)(struct perf_event *event);
int (*ctr_get_width)(int idx);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index ee8b9ecdc03b..c6a3bac76966 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -36,6 +36,7 @@ struct perf_guest_info_callbacks {
};
#ifdef CONFIG_HAVE_HW_BREAKPOINT
+#include <linux/rhashtable-types.h>
#include <asm/hw_breakpoint.h>
#endif
@@ -60,6 +61,7 @@ struct perf_guest_info_callbacks {
#include <linux/refcount.h>
#include <linux/security.h>
#include <linux/static_call.h>
+#include <linux/lockdep.h>
#include <asm/local.h>
struct perf_callchain_entry {
@@ -137,9 +139,11 @@ struct hw_perf_event_extra {
* PERF_EVENT_FLAG_ARCH bits are reserved for architecture-specific
* usage.
*/
-#define PERF_EVENT_FLAG_ARCH 0x0000ffff
+#define PERF_EVENT_FLAG_ARCH 0x000fffff
#define PERF_EVENT_FLAG_USER_READ_CNT 0x80000000
+static_assert((PERF_EVENT_FLAG_USER_READ_CNT & PERF_EVENT_FLAG_ARCH) == 0);
+
/**
* struct hw_perf_event - performance event hardware details:
*/
@@ -178,7 +182,7 @@ struct hw_perf_event {
* creation and event initalization.
*/
struct arch_hw_breakpoint info;
- struct list_head bp_list;
+ struct rhlist_head bp_list;
};
#endif
struct { /* amd_iommu */
@@ -262,6 +266,7 @@ struct hw_perf_event {
};
struct perf_event;
+struct perf_event_pmu_context;
/*
* Common implementation detail of pmu::{start,commit,cancel}_txn
@@ -304,7 +309,7 @@ struct pmu {
int capabilities;
int __percpu *pmu_disable_count;
- struct perf_cpu_context __percpu *pmu_cpu_context;
+ struct perf_cpu_pmu_context __percpu *cpu_pmu_context;
atomic_t exclusive_cnt; /* < 0: cpu; > 0: tsk */
int task_ctx_nr;
int hrtimer_interval_ms;
@@ -439,7 +444,7 @@ struct pmu {
/*
* context-switches callback
*/
- void (*sched_task) (struct perf_event_context *ctx,
+ void (*sched_task) (struct perf_event_pmu_context *pmu_ctx,
bool sched_in);
/*
@@ -453,8 +458,8 @@ struct pmu {
* implementation and Perf core context switch handling callbacks for usage
* examples.
*/
- void (*swap_task_ctx) (struct perf_event_context *prev,
- struct perf_event_context *next);
+ void (*swap_task_ctx) (struct perf_event_pmu_context *prev_epc,
+ struct perf_event_pmu_context *next_epc);
/* optional */
/*
@@ -518,9 +523,10 @@ struct pmu {
/* optional */
/*
- * Filter events for PMU-specific reasons.
+ * Skip programming this PMU on the given CPU. Typically needed for
+ * big.LITTLE things.
*/
- int (*filter_match) (struct perf_event *event); /* optional */
+ bool (*filter) (struct pmu *pmu, int cpu); /* optional */
/*
* Check period value for PERF_EVENT_IOC_PERIOD ioctl.
@@ -631,7 +637,23 @@ struct pmu_event_list {
struct list_head list;
};
+/*
+ * event->sibling_list is modified whole holding both ctx->lock and ctx->mutex
+ * as such iteration must hold either lock. However, since ctx->lock is an IRQ
+ * safe lock, and is only held by the CPU doing the modification, having IRQs
+ * disabled is sufficient since it will hold-off the IPIs.
+ */
+#ifdef CONFIG_PROVE_LOCKING
+#define lockdep_assert_event_ctx(event) \
+ WARN_ON_ONCE(__lockdep_enabled && \
+ (this_cpu_read(hardirqs_enabled) && \
+ lockdep_is_held(&(event)->ctx->mutex) != LOCK_STATE_HELD))
+#else
+#define lockdep_assert_event_ctx(event)
+#endif
+
#define for_each_sibling_event(sibling, event) \
+ lockdep_assert_event_ctx(event); \
if ((event)->group_leader == (event)) \
list_for_each_entry((sibling), &(event)->sibling_list, sibling_list)
@@ -675,6 +697,11 @@ struct perf_event {
int group_caps;
struct perf_event *group_leader;
+ /*
+ * event->pmu will always point to pmu in which this event belongs.
+ * Whereas event->pmu_ctx->pmu may point to other pmu when group of
+ * different pmu events is created.
+ */
struct pmu *pmu;
void *pmu_private;
@@ -700,6 +727,12 @@ struct perf_event {
struct hw_perf_event hw;
struct perf_event_context *ctx;
+ /*
+ * event->pmu_ctx points to perf_event_pmu_context in which the event
+ * is added. This pmu_ctx can be of other pmu for sw event when that
+ * sw event is part of a group which also contains non-sw events.
+ */
+ struct perf_event_pmu_context *pmu_ctx;
atomic_long_t refcount;
/*
@@ -736,11 +769,14 @@ struct perf_event {
struct fasync_struct *fasync;
/* delayed work for NMIs and such */
- int pending_wakeup;
- int pending_kill;
- int pending_disable;
+ unsigned int pending_wakeup;
+ unsigned int pending_kill;
+ unsigned int pending_disable;
+ unsigned int pending_sigtrap;
unsigned long pending_addr; /* SIGTRAP */
- struct irq_work pending;
+ struct irq_work pending_irq;
+ struct callback_head pending_task;
+ unsigned int pending_work;
atomic_t event_limit;
@@ -789,19 +825,69 @@ struct perf_event {
#endif /* CONFIG_PERF_EVENTS */
};
+/*
+ * ,-----------------------[1:n]----------------------.
+ * V V
+ * perf_event_context <-[1:n]-> perf_event_pmu_context <--- perf_event
+ * ^ ^ | |
+ * `--------[1:n]---------' `-[n:1]-> pmu <-[1:n]-'
+ *
+ *
+ * struct perf_event_pmu_context lifetime is refcount based and RCU freed
+ * (similar to perf_event_context). Locking is as if it were a member of
+ * perf_event_context; specifically:
+ *
+ * modification, both: ctx->mutex && ctx->lock
+ * reading, either: ctx->mutex || ctx->lock
+ *
+ * There is one exception to this; namely put_pmu_ctx() isn't always called
+ * with ctx->mutex held; this means that as long as we can guarantee the epc
+ * has events the above rules hold.
+ *
+ * Specificially, sys_perf_event_open()'s group_leader case depends on
+ * ctx->mutex pinning the configuration. Since we hold a reference on
+ * group_leader (through the filedesc) it can't go away, therefore it's
+ * associated pmu_ctx must exist and cannot change due to ctx->mutex.
+ */
+struct perf_event_pmu_context {
+ struct pmu *pmu;
+ struct perf_event_context *ctx;
+
+ struct list_head pmu_ctx_entry;
+
+ struct list_head pinned_active;
+ struct list_head flexible_active;
+
+ /* Used to avoid freeing per-cpu perf_event_pmu_context */
+ unsigned int embedded : 1;
+
+ unsigned int nr_events;
+
+ atomic_t refcount; /* event <-> epc */
+ struct rcu_head rcu_head;
+
+ void *task_ctx_data; /* pmu specific data */
+ /*
+ * Set when one or more (plausibly active) event can't be scheduled
+ * due to pmu overcommit or pmu constraints, except tolerant to
+ * events not necessary to be active due to scheduling constraints,
+ * such as cgroups.
+ */
+ int rotate_necessary;
+};
struct perf_event_groups {
struct rb_root tree;
u64 index;
};
+
/**
* struct perf_event_context - event context structure
*
* Used as a container for task events and CPU events as well:
*/
struct perf_event_context {
- struct pmu *pmu;
/*
* Protect the states of the events in the list,
* nr_active, and the list:
@@ -814,27 +900,21 @@ struct perf_event_context {
*/
struct mutex mutex;
- struct list_head active_ctx_list;
+ struct list_head pmu_ctx_list;
struct perf_event_groups pinned_groups;
struct perf_event_groups flexible_groups;
struct list_head event_list;
- struct list_head pinned_active;
- struct list_head flexible_active;
-
int nr_events;
- int nr_active;
int nr_user;
int is_active;
+
+ int nr_task_data;
int nr_stat;
int nr_freq;
int rotate_disable;
- /*
- * Set when nr_events != nr_active, except tolerant to events not
- * necessary to be active due to scheduling constraints, such as cgroups.
- */
- int rotate_necessary;
- refcount_t refcount;
+
+ refcount_t refcount; /* event <-> ctx */
struct task_struct *task;
/*
@@ -855,8 +935,15 @@ struct perf_event_context {
#ifdef CONFIG_CGROUP_PERF
int nr_cgroups; /* cgroup evts */
#endif
- void *task_ctx_data; /* pmu specific data */
struct rcu_head rcu_head;
+
+ /*
+ * Sum (event->pending_sigtrap + event->pending_work)
+ *
+ * The SIGTRAP is targeted at ctx->task, as such it won't do changing
+ * that until the signal is delivered.
+ */
+ local_t nr_pending;
};
/*
@@ -865,12 +952,13 @@ struct perf_event_context {
*/
#define PERF_NR_CONTEXTS 4
-/**
- * struct perf_cpu_context - per cpu event context structure
- */
-struct perf_cpu_context {
- struct perf_event_context ctx;
- struct perf_event_context *task_ctx;
+struct perf_cpu_pmu_context {
+ struct perf_event_pmu_context epc;
+ struct perf_event_pmu_context *task_epc;
+
+ struct list_head sched_cb_entry;
+ int sched_cb_usage;
+
int active_oncpu;
int exclusive;
@@ -878,16 +966,20 @@ struct perf_cpu_context {
struct hrtimer hrtimer;
ktime_t hrtimer_interval;
unsigned int hrtimer_active;
+};
+
+/**
+ * struct perf_event_cpu_context - per cpu event context structure
+ */
+struct perf_cpu_context {
+ struct perf_event_context ctx;
+ struct perf_event_context *task_ctx;
+ int online;
#ifdef CONFIG_CGROUP_PERF
struct perf_cgroup *cgrp;
- struct list_head cgrp_cpuctx_entry;
#endif
- struct list_head sched_cb_entry;
- int sched_cb_usage;
-
- int online;
/*
* Per-CPU storage for iterators used in visit_groups_merge. The default
* storage is of size 2 to hold the CPU and any CPU event iterators.
@@ -951,6 +1043,8 @@ perf_cgroup_from_task(struct task_struct *task, struct perf_event_context *ctx)
#ifdef CONFIG_PERF_EVENTS
+extern struct perf_event_context *perf_cpu_task_ctx(void);
+
extern void *perf_aux_output_begin(struct perf_output_handle *handle,
struct perf_event *event);
extern void perf_aux_output_end(struct perf_output_handle *handle,
@@ -1007,18 +1101,20 @@ struct perf_sample_data {
* Fields set by perf_sample_data_init(), group so as to
* minimize the cachelines touched.
*/
- u64 addr;
- struct perf_raw_record *raw;
- struct perf_branch_stack *br_stack;
+ u64 sample_flags;
u64 period;
- union perf_sample_weight weight;
- u64 txn;
- union perf_mem_data_src data_src;
/*
* The other fields, optionally {set,used} by
* perf_{prepare,output}_sample().
*/
+ struct perf_branch_stack *br_stack;
+ union perf_sample_weight weight;
+ union perf_mem_data_src data_src;
+ u64 txn;
+ u64 addr;
+ struct perf_raw_record *raw;
+
u64 type;
u64 ip;
struct {
@@ -1056,13 +1152,13 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
u64 addr, u64 period)
{
/* remaining struct members initialized in perf_prepare_sample() */
- data->addr = addr;
- data->raw = NULL;
- data->br_stack = NULL;
+ data->sample_flags = PERF_SAMPLE_PERIOD;
data->period = period;
- data->weight.full = 0;
- data->data_src.val = PERF_MEM_NA;
- data->txn = 0;
+
+ if (addr) {
+ data->addr = addr;
+ data->sample_flags |= PERF_SAMPLE_ADDR;
+ }
}
/*
@@ -1078,6 +1174,7 @@ static inline void perf_clear_branch_entry_bitfields(struct perf_branch_entry *b
br->abort = 0;
br->cycles = 0;
br->type = 0;
+ br->spec = PERF_BR_SPEC_NA;
br->reserved = 0;
}
@@ -1153,7 +1250,7 @@ static inline int is_software_event(struct perf_event *event)
*/
static inline int in_software_context(struct perf_event *event)
{
- return event->ctx->pmu->task_ctx_nr == perf_sw_context;
+ return event->pmu_ctx->pmu->task_ctx_nr == perf_sw_context;
}
static inline int is_exclusive_pmu(struct pmu *pmu)
@@ -1684,4 +1781,30 @@ static inline void perf_lopwr_cb(bool mode)
}
#endif
+#ifdef CONFIG_PERF_EVENTS
+static inline bool branch_sample_no_flags(const struct perf_event *event)
+{
+ return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_FLAGS;
+}
+
+static inline bool branch_sample_no_cycles(const struct perf_event *event)
+{
+ return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_CYCLES;
+}
+
+static inline bool branch_sample_type(const struct perf_event *event)
+{
+ return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_TYPE_SAVE;
+}
+
+static inline bool branch_sample_hw_index(const struct perf_event *event)
+{
+ return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX;
+}
+
+static inline bool branch_sample_priv(const struct perf_event *event)
+{
+ return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_PRIV_SAVE;
+}
+#endif /* CONFIG_PERF_EVENTS */
#endif /* _LINUX_PERF_EVENT_H */
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 014ee8f0fbaa..dfabd549d2e7 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -165,6 +165,13 @@ static inline pte_t *virt_to_kpte(unsigned long vaddr)
return pmd_none(*pmd) ? NULL : pte_offset_kernel(pmd, vaddr);
}
+#ifndef pmd_young
+static inline int pmd_young(pmd_t pmd)
+{
+ return 0;
+}
+#endif
+
#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
extern int ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep,
@@ -213,7 +220,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
#endif
#ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
unsigned long address,
pmd_t *pmdp)
@@ -234,7 +241,7 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
BUILD_BUG();
return 0;
}
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG */
#endif
#ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
@@ -260,6 +267,30 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma,
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif
+#ifndef arch_has_hw_nonleaf_pmd_young
+/*
+ * Return whether the accessed bit in non-leaf PMD entries is supported on the
+ * local CPU.
+ */
+static inline bool arch_has_hw_nonleaf_pmd_young(void)
+{
+ return IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG);
+}
+#endif
+
+#ifndef arch_has_hw_pte_young
+/*
+ * Return whether the accessed bit is supported on the local CPU.
+ *
+ * This stub assumes accessing through an old PTE triggers a page fault.
+ * Architectures that automatically set the access bit should overwrite it.
+ */
+static inline bool arch_has_hw_pte_young(void)
+{
+ return false;
+}
+#endif
+
#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
unsigned long address,
@@ -394,9 +425,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
unsigned long address, pte_t *ptep,
int full)
{
- pte_t pte;
- pte = ptep_get_and_clear(mm, address, ptep);
- return pte;
+ return ptep_get_and_clear(mm, address, ptep);
}
#endif
@@ -472,30 +501,6 @@ static inline pte_t pte_sw_mkyoung(pte_t pte)
#define pte_sw_mkyoung pte_sw_mkyoung
#endif
-#ifndef pte_savedwrite
-#define pte_savedwrite pte_write
-#endif
-
-#ifndef pte_mk_savedwrite
-#define pte_mk_savedwrite pte_mkwrite
-#endif
-
-#ifndef pte_clear_savedwrite
-#define pte_clear_savedwrite pte_wrprotect
-#endif
-
-#ifndef pmd_savedwrite
-#define pmd_savedwrite pmd_write
-#endif
-
-#ifndef pmd_mk_savedwrite
-#define pmd_mk_savedwrite pmd_mkwrite
-#endif
-
-#ifndef pmd_clear_savedwrite
-#define pmd_clear_savedwrite pmd_wrprotect
-#endif
-
#ifndef __HAVE_ARCH_PMDP_SET_WRPROTECT
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static inline void pmdp_set_wrprotect(struct mm_struct *mm,
@@ -1276,8 +1281,7 @@ static inline int pgd_devmap(pgd_t pgd)
#endif
#if !defined(CONFIG_TRANSPARENT_HUGEPAGE) || \
- (defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
- !defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD))
+ !defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
static inline int pud_trans_huge(pud_t pud)
{
return 0;
@@ -1598,11 +1602,7 @@ typedef unsigned int pgtbl_mod_mask;
#endif
#ifndef has_transparent_hugepage
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define has_transparent_hugepage() 1
-#else
-#define has_transparent_hugepage() 0
-#endif
+#define has_transparent_hugepage() IS_BUILTIN(CONFIG_TRANSPARENT_HUGEPAGE)
#endif
/*
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 87638c55d844..71eeb4e3b1fd 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -115,6 +115,8 @@ extern const int phy_10gbit_features_array[1];
* @PHY_INTERFACE_MODE_25GBASER: 25G BaseR
* @PHY_INTERFACE_MODE_USXGMII: Universal Serial 10GE MII
* @PHY_INTERFACE_MODE_10GKR: 10GBASE-KR - with Clause 73 AN
+ * @PHY_INTERFACE_MODE_QUSGMII: Quad Universal SGMII
+ * @PHY_INTERFACE_MODE_1000BASEKX: 1000Base-KX - with Clause 73 AN
* @PHY_INTERFACE_MODE_MAX: Book keeping
*
* Describes the interface between the MAC and PHY.
@@ -152,6 +154,8 @@ typedef enum {
PHY_INTERFACE_MODE_USXGMII,
/* 10GBASE-KR - with Clause 73 AN */
PHY_INTERFACE_MODE_10GKR,
+ PHY_INTERFACE_MODE_QUSGMII,
+ PHY_INTERFACE_MODE_1000BASEKX,
PHY_INTERFACE_MODE_MAX,
} phy_interface_t;
@@ -249,6 +253,8 @@ static inline const char *phy_modes(phy_interface_t interface)
return "trgmii";
case PHY_INTERFACE_MODE_1000BASEX:
return "1000base-x";
+ case PHY_INTERFACE_MODE_1000BASEKX:
+ return "1000base-kx";
case PHY_INTERFACE_MODE_2500BASEX:
return "2500base-x";
case PHY_INTERFACE_MODE_5GBASER:
@@ -267,12 +273,13 @@ static inline const char *phy_modes(phy_interface_t interface)
return "10gbase-kr";
case PHY_INTERFACE_MODE_100BASEX:
return "100base-x";
+ case PHY_INTERFACE_MODE_QUSGMII:
+ return "qusgmii";
default:
return "unknown";
}
}
-
#define PHY_INIT_TIMEOUT 100000
#define PHY_FORCE_TIMEOUT 10
@@ -522,6 +529,8 @@ struct macsec_ops;
*
* @mdio: MDIO bus this PHY is on
* @drv: Pointer to the driver for this PHY instance
+ * @devlink: Create a link between phy dev and mac dev, if the external phy
+ * used by current mac interface is managed by another mac interface.
* @phy_id: UID for this device found during discovery
* @c45_ids: 802.3-c45 Device Identifiers if is_c45.
* @is_c45: Set to true if this PHY uses clause 45 addressing.
@@ -564,8 +573,10 @@ struct macsec_ops;
* @advertising: Currently advertised linkmodes
* @adv_old: Saved advertised while power saving for WoL
* @lp_advertising: Current link partner advertised linkmodes
+ * @host_interfaces: PHY interface modes supported by host
* @eee_broken_modes: Energy efficient ethernet modes which should be prohibited
* @autoneg: Flag autoneg being used
+ * @rate_matching: Current rate matching mode
* @link: Current link state
* @autoneg_complete: Flag auto negotiation of the link has completed
* @mdix: Current crossover
@@ -588,8 +599,10 @@ struct macsec_ops;
* @master_slave_get: Current master/slave advertisement
* @master_slave_state: Current master/slave configuration
* @mii_ts: Pointer to time stamper callbacks
+ * @psec: Pointer to Power Sourcing Equipment control struct
* @lock: Mutex for serialization access to PHY
* @state_queue: Work queue for state machine
+ * @link_down_events: Number of times link was lost
* @shared: Pointer to private data shared by phys in one package
* @priv: Pointer to driver private data
*
@@ -607,6 +620,8 @@ struct phy_device {
/* And management functions */
struct phy_driver *drv;
+ struct device_link *devlink;
+
u32 phy_id;
struct phy_c45_device_ids c45_ids;
@@ -633,6 +648,8 @@ struct phy_device {
unsigned irq_suspended:1;
unsigned irq_rerun:1;
+ int rate_matching;
+
enum phy_state state;
u32 dev_flags;
@@ -660,6 +677,9 @@ struct phy_device {
/* used with phy_speed_down */
__ETHTOOL_DECLARE_LINK_MODE_MASK(adv_old);
+ /* Host supported PHY interface types. Should be ignored if empty. */
+ DECLARE_PHY_INTERFACE_MASK(host_interfaces);
+
/* Energy efficient ethernet modes which should be prohibited */
u32 eee_broken_modes;
@@ -701,12 +721,15 @@ struct phy_device {
struct phylink *phylink;
struct net_device *attached_dev;
struct mii_timestamper *mii_ts;
+ struct pse_control *psec;
u8 mdix;
u8 mdix_ctrl;
int pma_extable;
+ unsigned int link_down_events;
+
void (*phy_link_change)(struct phy_device *phydev, bool up);
void (*adjust_link)(struct net_device *dev);
@@ -797,6 +820,21 @@ struct phy_driver {
*/
int (*get_features)(struct phy_device *phydev);
+ /**
+ * @get_rate_matching: Get the supported type of rate matching for a
+ * particular phy interface. This is used by phy consumers to determine
+ * whether to advertise lower-speed modes for that interface. It is
+ * assumed that if a rate matching mode is supported on an interface,
+ * then that interface's rate can be adapted to all slower link speeds
+ * supported by the phy. If iface is %PHY_INTERFACE_MODE_NA, and the phy
+ * supports any kind of rate matching for any interface, then it must
+ * return that rate matching mode (preferring %RATE_MATCH_PAUSE to
+ * %RATE_MATCH_CRS). If the interface is not supported, this should
+ * return %RATE_MATCH_NONE.
+ */
+ int (*get_rate_matching)(struct phy_device *phydev,
+ phy_interface_t iface);
+
/* PHY Power Management */
/** @suspend: Suspend the hardware, saving state if needed */
int (*suspend)(struct phy_device *phydev);
@@ -963,6 +1001,9 @@ struct phy_fixup {
const char *phy_speed_to_str(int speed);
const char *phy_duplex_to_str(unsigned int duplex);
+const char *phy_rate_matching_to_str(int rate_matching);
+
+int phy_interface_num_ports(phy_interface_t interface);
/* A structure for mapping a particular speed and duplex
* combination to a particular SUPPORTED and ADVERTISED value
@@ -1677,6 +1718,8 @@ int phy_disable_interrupts(struct phy_device *phydev);
void phy_request_interrupt(struct phy_device *phydev);
void phy_free_interrupt(struct phy_device *phydev);
void phy_print_status(struct phy_device *phydev);
+int phy_get_rate_matching(struct phy_device *phydev,
+ phy_interface_t iface);
void phy_set_max_speed(struct phy_device *phydev, u32 max_speed);
void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode);
void phy_advertise_supported(struct phy_device *phydev);
diff --git a/include/linux/phy/pcie.h b/include/linux/phy/pcie.h
new file mode 100644
index 000000000000..e7ac81764576
--- /dev/null
+++ b/include/linux/phy/pcie.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2022 Rockchip Electronics Co., Ltd.
+ */
+#ifndef __PHY_PCIE_H
+#define __PHY_PCIE_H
+
+#define PHY_MODE_PCIE_RC 20
+#define PHY_MODE_PCIE_EP 21
+#define PHY_MODE_PCIE_BIFURCATION 22
+
+#endif
diff --git a/include/linux/phy/phy-mipi-dphy.h b/include/linux/phy/phy-mipi-dphy.h
index a877ffee845d..1ac128d78dfe 100644
--- a/include/linux/phy/phy-mipi-dphy.h
+++ b/include/linux/phy/phy-mipi-dphy.h
@@ -279,6 +279,9 @@ int phy_mipi_dphy_get_default_config(unsigned long pixel_clock,
unsigned int bpp,
unsigned int lanes,
struct phy_configure_opts_mipi_dphy *cfg);
+int phy_mipi_dphy_get_default_config_for_hsclk(unsigned long long hs_clk_rate,
+ unsigned int lanes,
+ struct phy_configure_opts_mipi_dphy *cfg);
int phy_mipi_dphy_config_validate(struct phy_configure_opts_mipi_dphy *cfg);
#endif /* __PHY_MIPI_DPHY_H_ */
diff --git a/include/linux/phy/tegra/xusb.h b/include/linux/phy/tegra/xusb.h
index 3a35e74cdc61..70998e6dd6fd 100644
--- a/include/linux/phy/tegra/xusb.h
+++ b/include/linux/phy/tegra/xusb.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved.
+ * Copyright (c) 2016-2022, NVIDIA CORPORATION. All rights reserved.
*/
#ifndef PHY_TEGRA_XUSB_H
@@ -21,6 +21,8 @@ int tegra_xusb_padctl_usb3_set_lfps_detect(struct tegra_xusb_padctl *padctl,
unsigned int port, bool enable);
int tegra_xusb_padctl_set_vbus_override(struct tegra_xusb_padctl *padctl,
bool val);
+void tegra_phy_xusb_utmi_pad_power_on(struct phy *phy);
+void tegra_phy_xusb_utmi_pad_power_down(struct phy *phy);
int tegra_phy_xusb_utmi_port_reset(struct phy *phy);
int tegra_xusb_padctl_get_usb3_companion(struct tegra_xusb_padctl *padctl,
unsigned int port);
diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index 6d06896fc20d..c492c26202b5 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -21,6 +21,35 @@ enum {
MLO_AN_FIXED, /* Fixed-link mode */
MLO_AN_INBAND, /* In-band protocol */
+ /* MAC_SYM_PAUSE and MAC_ASYM_PAUSE are used when configuring our
+ * autonegotiation advertisement. They correspond to the PAUSE and
+ * ASM_DIR bits defined by 802.3, respectively.
+ *
+ * The following table lists the values of tx_pause and rx_pause which
+ * might be requested in mac_link_up. The exact values depend on either
+ * the results of autonegotation (if MLO_PAUSE_AN is set) or user
+ * configuration (if MLO_PAUSE_AN is not set).
+ *
+ * MAC_SYM_PAUSE MAC_ASYM_PAUSE MLO_PAUSE_AN tx_pause/rx_pause
+ * ============= ============== ============ ==================
+ * 0 0 0 0/0
+ * 0 0 1 0/0
+ * 0 1 0 0/0, 0/1, 1/0, 1/1
+ * 0 1 1 0/0, 1/0
+ * 1 0 0 0/0, 1/1
+ * 1 0 1 0/0, 1/1
+ * 1 1 0 0/0, 0/1, 1/0, 1/1
+ * 1 1 1 0/0, 0/1, 1/1
+ *
+ * If you set MAC_ASYM_PAUSE, the user may request any combination of
+ * tx_pause and rx_pause. You do not have to support these
+ * combinations.
+ *
+ * However, you should support combinations of tx_pause and rx_pause
+ * which might be the result of autonegotation. For example, don't set
+ * MAC_SYM_PAUSE unless your device can support tx_pause and rx_pause
+ * at the same time.
+ */
MAC_SYM_PAUSE = BIT(0),
MAC_ASYM_PAUSE = BIT(1),
MAC_10HD = BIT(2),
@@ -59,6 +88,10 @@ static inline bool phylink_autoneg_inband(unsigned int mode)
* @speed: link speed, one of the SPEED_* constants.
* @duplex: link duplex mode, one of DUPLEX_* constants.
* @pause: link pause state, described by MLO_PAUSE_* constants.
+ * @rate_matching: rate matching being performed, one of the RATE_MATCH_*
+ * constants. If rate matching is taking place, then the speed/duplex of
+ * the medium link mode (@speed and @duplex) and the speed/duplex of the phy
+ * interface mode (@interface) are different.
* @link: true if the link is up.
* @an_enabled: true if autonegotiation is enabled/desired.
* @an_complete: true if autonegotiation has completed.
@@ -70,6 +103,7 @@ struct phylink_link_state {
int speed;
int duplex;
int pause;
+ int rate_matching;
unsigned int link:1;
unsigned int an_enabled:1;
unsigned int an_complete:1;
@@ -88,6 +122,7 @@ enum phylink_op_type {
* (See commit 7cceb599d15d ("net: phylink: avoid mac_config calls")
* @poll_fixed_state: if true, starts link_poll,
* if MAC link is at %MLO_AN_FIXED mode.
+ * @mac_managed_pm: if true, indicate the MAC driver is responsible for PHY PM.
* @ovr_an_inband: if true, override PCS to MLO_AN_INBAND
* @get_fixed_state: callback to execute to determine the fixed link state,
* if MAC link is at %MLO_AN_FIXED mode.
@@ -100,6 +135,7 @@ struct phylink_config {
enum phylink_op_type type;
bool legacy_pre_march2020;
bool poll_fixed_state;
+ bool mac_managed_pm;
bool ovr_an_inband;
void (*get_fixed_state)(struct phylink_config *config,
struct phylink_link_state *state);
@@ -171,6 +207,11 @@ struct phylink_mac_ops {
*
* If the @state->interface mode is not supported, then the @supported
* mask must be cleared.
+ *
+ * This member is optional; if not set, the generic validator will be
+ * used making use of @config->mac_capabilities and
+ * @config->supported_interfaces to determine which link modes are
+ * supported.
*/
void validate(struct phylink_config *config, unsigned long *supported,
struct phylink_link_state *state);
@@ -518,8 +559,13 @@ void pcs_link_up(struct phylink_pcs *pcs, unsigned int mode,
phy_interface_t interface, int speed, int duplex);
#endif
-void phylink_get_linkmodes(unsigned long *linkmodes, phy_interface_t interface,
- unsigned long mac_capabilities);
+void phylink_caps_to_linkmodes(unsigned long *linkmodes, unsigned long caps);
+unsigned long phylink_get_capabilities(phy_interface_t interface,
+ unsigned long mac_capabilities,
+ int rate_matching);
+void phylink_validate_mask_caps(unsigned long *supported,
+ struct phylink_link_state *state,
+ unsigned long caps);
void phylink_generic_validate(struct phylink_config *config,
unsigned long *supported,
struct phylink_link_state *state);
@@ -575,6 +621,30 @@ int phylink_speed_up(struct phylink *pl);
void phylink_set_port_modes(unsigned long *bits);
+/**
+ * phylink_get_link_timer_ns - return the PCS link timer value
+ * @interface: link &typedef phy_interface_t mode
+ *
+ * Return the PCS link timer setting in nanoseconds for the PHY @interface
+ * mode, or -EINVAL if not appropriate.
+ */
+static inline int phylink_get_link_timer_ns(phy_interface_t interface)
+{
+ switch (interface) {
+ case PHY_INTERFACE_MODE_SGMII:
+ case PHY_INTERFACE_MODE_QSGMII:
+ case PHY_INTERFACE_MODE_USXGMII:
+ return 1600000;
+
+ case PHY_INTERFACE_MODE_1000BASEX:
+ case PHY_INTERFACE_MODE_2500BASEX:
+ return 10000000;
+
+ default:
+ return -EINVAL;
+ }
+}
+
void phylink_mii_c22_pcs_decode_state(struct phylink_link_state *state,
u16 bmsr, u16 lpa);
void phylink_mii_c22_pcs_get_state(struct mdio_device *pcs,
diff --git a/include/linux/pinctrl/consumer.h b/include/linux/pinctrl/consumer.h
index 019fecd75d0c..4729d54e8995 100644
--- a/include/linux/pinctrl/consumer.h
+++ b/include/linux/pinctrl/consumer.h
@@ -12,14 +12,15 @@
#define __LINUX_PINCTRL_CONSUMER_H
#include <linux/err.h>
-#include <linux/list.h>
-#include <linux/seq_file.h>
+#include <linux/types.h>
+
#include <linux/pinctrl/pinctrl-state.h>
+struct device;
+
/* This struct is private to the core and should be regarded as a cookie */
struct pinctrl;
struct pinctrl_state;
-struct device;
#ifdef CONFIG_PINCTRL
@@ -33,9 +34,8 @@ extern int pinctrl_gpio_set_config(unsigned gpio, unsigned long config);
extern struct pinctrl * __must_check pinctrl_get(struct device *dev);
extern void pinctrl_put(struct pinctrl *p);
-extern struct pinctrl_state * __must_check pinctrl_lookup_state(
- struct pinctrl *p,
- const char *name);
+extern struct pinctrl_state * __must_check pinctrl_lookup_state(struct pinctrl *p,
+ const char *name);
extern int pinctrl_select_state(struct pinctrl *p, struct pinctrl_state *s);
extern struct pinctrl * __must_check devm_pinctrl_get(struct device *dev);
@@ -101,9 +101,8 @@ static inline void pinctrl_put(struct pinctrl *p)
{
}
-static inline struct pinctrl_state * __must_check pinctrl_lookup_state(
- struct pinctrl *p,
- const char *name)
+static inline struct pinctrl_state * __must_check pinctrl_lookup_state(struct pinctrl *p,
+ const char *name)
{
return NULL;
}
@@ -145,8 +144,8 @@ static inline int pinctrl_pm_select_idle_state(struct device *dev)
#endif /* CONFIG_PINCTRL */
-static inline struct pinctrl * __must_check pinctrl_get_select(
- struct device *dev, const char *name)
+static inline struct pinctrl * __must_check pinctrl_get_select(struct device *dev,
+ const char *name)
{
struct pinctrl *p;
struct pinctrl_state *s;
@@ -171,14 +170,13 @@ static inline struct pinctrl * __must_check pinctrl_get_select(
return p;
}
-static inline struct pinctrl * __must_check pinctrl_get_select_default(
- struct device *dev)
+static inline struct pinctrl * __must_check pinctrl_get_select_default(struct device *dev)
{
return pinctrl_get_select(dev, PINCTRL_STATE_DEFAULT);
}
-static inline struct pinctrl * __must_check devm_pinctrl_get_select(
- struct device *dev, const char *name)
+static inline struct pinctrl * __must_check devm_pinctrl_get_select(struct device *dev,
+ const char *name)
{
struct pinctrl *p;
struct pinctrl_state *s;
@@ -203,8 +201,7 @@ static inline struct pinctrl * __must_check devm_pinctrl_get_select(
return p;
}
-static inline struct pinctrl * __must_check devm_pinctrl_get_select_default(
- struct device *dev)
+static inline struct pinctrl * __must_check devm_pinctrl_get_select_default(struct device *dev)
{
return devm_pinctrl_get_select(dev, PINCTRL_STATE_DEFAULT);
}
diff --git a/include/linux/pinctrl/devinfo.h b/include/linux/pinctrl/devinfo.h
index a48ff69acddd..9e8b559e1253 100644
--- a/include/linux/pinctrl/devinfo.h
+++ b/include/linux/pinctrl/devinfo.h
@@ -14,11 +14,15 @@
#ifndef PINCTRL_DEVINFO_H
#define PINCTRL_DEVINFO_H
+struct device;
+
#ifdef CONFIG_PINCTRL
/* The device core acts as a consumer toward pinctrl */
#include <linux/pinctrl/consumer.h>
+struct pinctrl;
+
/**
* struct dev_pin_info - pin state container for devices
* @p: pinctrl handle for the containing device
@@ -42,8 +46,6 @@ extern int pinctrl_init_done(struct device *dev);
#else
-struct device;
-
/* Stubs if we're not using pinctrl */
static inline int pinctrl_bind_pins(struct device *dev)
diff --git a/include/linux/pinctrl/machine.h b/include/linux/pinctrl/machine.h
index e987dc9fd2af..0639b36f43c5 100644
--- a/include/linux/pinctrl/machine.h
+++ b/include/linux/pinctrl/machine.h
@@ -11,7 +11,7 @@
#ifndef __LINUX_PINCTRL_MACHINE_H
#define __LINUX_PINCTRL_MACHINE_H
-#include <linux/bug.h>
+#include <linux/kernel.h> /* ARRAY_SIZE() */
#include <linux/pinctrl/pinctrl-state.h>
@@ -149,16 +149,18 @@ struct pinctrl_map {
#define PIN_MAP_CONFIGS_GROUP_HOG_DEFAULT(dev, grp, cfgs) \
PIN_MAP_CONFIGS_GROUP(dev, PINCTRL_STATE_DEFAULT, dev, grp, cfgs)
+struct pinctrl_map;
+
#ifdef CONFIG_PINCTRL
extern int pinctrl_register_mappings(const struct pinctrl_map *map,
- unsigned num_maps);
+ unsigned num_maps);
extern void pinctrl_unregister_mappings(const struct pinctrl_map *map);
extern void pinctrl_provide_dummies(void);
#else
static inline int pinctrl_register_mappings(const struct pinctrl_map *map,
- unsigned num_maps)
+ unsigned num_maps)
{
return 0;
}
diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h
index 2422211d6a5a..d74b7a4ea154 100644
--- a/include/linux/pinctrl/pinconf-generic.h
+++ b/include/linux/pinctrl/pinconf-generic.h
@@ -11,9 +11,12 @@
#ifndef __LINUX_PINCTRL_PINCONF_GENERIC_H
#define __LINUX_PINCTRL_PINCONF_GENERIC_H
-#include <linux/device.h>
+#include <linux/types.h>
+
#include <linux/pinctrl/machine.h>
+struct device_node;
+
struct pinctrl_dev;
struct pinctrl_map;
@@ -35,7 +38,8 @@ struct pinctrl_map;
* impedance.
* @PIN_CONFIG_BIAS_PULL_DOWN: the pin will be pulled down (usually with high
* impedance to GROUND). If the argument is != 0 pull-down is enabled,
- * if it is 0, pull-down is total, i.e. the pin is connected to GROUND.
+ * the value is interpreted by the driver and can be custom or an SI unit
+ * such as Ohms.
* @PIN_CONFIG_BIAS_PULL_PIN_DEFAULT: the pin will be pulled up or down based
* on embedded knowledge of the controller hardware, like current mux
* function. The pull direction and possibly strength too will normally
@@ -46,7 +50,8 @@ struct pinctrl_map;
* @PIN_CONFIG_BIAS_DISABLE.
* @PIN_CONFIG_BIAS_PULL_UP: the pin will be pulled up (usually with high
* impedance to VDD). If the argument is != 0 pull-up is enabled,
- * if it is 0, pull-up is total, i.e. the pin is connected to VDD.
+ * the value is interpreted by the driver and can be custom or an SI unit
+ * such as Ohms.
* @PIN_CONFIG_DRIVE_OPEN_DRAIN: the pin will be driven with open drain (open
* collector) which means it is usually wired with other output ports
* which are then pulled up with an external resistor. Setting this
@@ -196,25 +201,25 @@ int pinconf_generic_dt_node_to_map(struct pinctrl_dev *pctldev,
void pinconf_generic_dt_free_map(struct pinctrl_dev *pctldev,
struct pinctrl_map *map, unsigned num_maps);
-static inline int pinconf_generic_dt_node_to_map_group(
- struct pinctrl_dev *pctldev, struct device_node *np_config,
- struct pinctrl_map **map, unsigned *num_maps)
+static inline int pinconf_generic_dt_node_to_map_group(struct pinctrl_dev *pctldev,
+ struct device_node *np_config, struct pinctrl_map **map,
+ unsigned *num_maps)
{
return pinconf_generic_dt_node_to_map(pctldev, np_config, map, num_maps,
PIN_MAP_TYPE_CONFIGS_GROUP);
}
-static inline int pinconf_generic_dt_node_to_map_pin(
- struct pinctrl_dev *pctldev, struct device_node *np_config,
- struct pinctrl_map **map, unsigned *num_maps)
+static inline int pinconf_generic_dt_node_to_map_pin(struct pinctrl_dev *pctldev,
+ struct device_node *np_config, struct pinctrl_map **map,
+ unsigned *num_maps)
{
return pinconf_generic_dt_node_to_map(pctldev, np_config, map, num_maps,
PIN_MAP_TYPE_CONFIGS_PIN);
}
-static inline int pinconf_generic_dt_node_to_map_all(
- struct pinctrl_dev *pctldev, struct device_node *np_config,
- struct pinctrl_map **map, unsigned *num_maps)
+static inline int pinconf_generic_dt_node_to_map_all(struct pinctrl_dev *pctldev,
+ struct device_node *np_config, struct pinctrl_map **map,
+ unsigned *num_maps)
{
/*
* passing the type as PIN_MAP_TYPE_INVALID causes the underlying parser
diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h
index 487117ccb1bc..a0d39b303431 100644
--- a/include/linux/pinctrl/pinctrl.h
+++ b/include/linux/pinctrl/pinctrl.h
@@ -11,20 +11,20 @@
#ifndef __LINUX_PINCTRL_PINCTRL_H
#define __LINUX_PINCTRL_PINCTRL_H
-#include <linux/radix-tree.h>
-#include <linux/list.h>
-#include <linux/seq_file.h>
-#include <linux/pinctrl/pinctrl-state.h>
-#include <linux/pinctrl/devinfo.h>
+#include <linux/types.h>
struct device;
+struct device_node;
+struct gpio_chip;
+struct module;
+struct seq_file;
+
+struct pin_config_item;
+struct pinconf_generic_params;
+struct pinconf_ops;
struct pinctrl_dev;
struct pinctrl_map;
struct pinmux_ops;
-struct pinconf_ops;
-struct pin_config_item;
-struct gpio_chip;
-struct device_node;
/**
* struct pingroup - provides information on pingroup
@@ -40,7 +40,7 @@ struct pingroup {
/* Convenience macro to define a single named or anonymous pingroup */
#define PINCTRL_PINGROUP(_name, _pins, _npins) \
-(struct pingroup){ \
+(struct pingroup) { \
.name = _name, \
.pins = _pins, \
.npins = _npins, \
diff --git a/include/linux/pinctrl/pinmux.h b/include/linux/pinctrl/pinmux.h
index 9a647fa5c8f1..a7e370965c53 100644
--- a/include/linux/pinctrl/pinmux.h
+++ b/include/linux/pinctrl/pinmux.h
@@ -11,11 +11,10 @@
#ifndef __LINUX_PINCTRL_PINMUX_H
#define __LINUX_PINCTRL_PINMUX_H
-#include <linux/list.h>
-#include <linux/seq_file.h>
-#include <linux/pinctrl/pinctrl.h>
+#include <linux/types.h>
struct pinctrl_dev;
+struct pinctrl_gpio_range;
/**
* struct pinmux_ops - pinmux operations, to be implemented by pin controller
diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h
deleted file mode 100644
index f9c5ac80d59b..000000000000
--- a/include/linux/pktcdvd.h
+++ /dev/null
@@ -1,197 +0,0 @@
-/*
- * Copyright (C) 2000 Jens Axboe <axboe@suse.de>
- * Copyright (C) 2001-2004 Peter Osterlund <petero2@telia.com>
- *
- * May be copied or modified under the terms of the GNU General Public
- * License. See linux/COPYING for more information.
- *
- * Packet writing layer for ATAPI and SCSI CD-R, CD-RW, DVD-R, and
- * DVD-RW devices.
- *
- */
-#ifndef __PKTCDVD_H
-#define __PKTCDVD_H
-
-#include <linux/blkdev.h>
-#include <linux/completion.h>
-#include <linux/cdrom.h>
-#include <linux/kobject.h>
-#include <linux/sysfs.h>
-#include <linux/mempool.h>
-#include <uapi/linux/pktcdvd.h>
-
-/* default bio write queue congestion marks */
-#define PKT_WRITE_CONGESTION_ON 10000
-#define PKT_WRITE_CONGESTION_OFF 9000
-
-
-struct packet_settings
-{
- __u32 size; /* packet size in (512 byte) sectors */
- __u8 fp; /* fixed packets */
- __u8 link_loss; /* the rest is specified
- * as per Mt Fuji */
- __u8 write_type;
- __u8 track_mode;
- __u8 block_mode;
-};
-
-/*
- * Very crude stats for now
- */
-struct packet_stats
-{
- unsigned long pkt_started;
- unsigned long pkt_ended;
- unsigned long secs_w;
- unsigned long secs_rg;
- unsigned long secs_r;
-};
-
-struct packet_cdrw
-{
- struct list_head pkt_free_list;
- struct list_head pkt_active_list;
- spinlock_t active_list_lock; /* Serialize access to pkt_active_list */
- struct task_struct *thread;
- atomic_t pending_bios;
-};
-
-/*
- * Switch to high speed reading after reading this many kilobytes
- * with no interspersed writes.
- */
-#define HI_SPEED_SWITCH 512
-
-struct packet_iosched
-{
- atomic_t attention; /* Set to non-zero when queue processing is needed */
- int writing; /* Non-zero when writing, zero when reading */
- spinlock_t lock; /* Protecting read/write queue manipulations */
- struct bio_list read_queue;
- struct bio_list write_queue;
- sector_t last_write; /* The sector where the last write ended */
- int successive_reads;
-};
-
-/*
- * 32 buffers of 2048 bytes
- */
-#if (PAGE_SIZE % CD_FRAMESIZE) != 0
-#error "PAGE_SIZE must be a multiple of CD_FRAMESIZE"
-#endif
-#define PACKET_MAX_SIZE 128
-#define FRAMES_PER_PAGE (PAGE_SIZE / CD_FRAMESIZE)
-#define PACKET_MAX_SECTORS (PACKET_MAX_SIZE * CD_FRAMESIZE >> 9)
-
-enum packet_data_state {
- PACKET_IDLE_STATE, /* Not used at the moment */
- PACKET_WAITING_STATE, /* Waiting for more bios to arrive, so */
- /* we don't have to do as much */
- /* data gathering */
- PACKET_READ_WAIT_STATE, /* Waiting for reads to fill in holes */
- PACKET_WRITE_WAIT_STATE, /* Waiting for the write to complete */
- PACKET_RECOVERY_STATE, /* Recover after read/write errors */
- PACKET_FINISHED_STATE, /* After write has finished */
-
- PACKET_NUM_STATES /* Number of possible states */
-};
-
-/*
- * Information needed for writing a single packet
- */
-struct pktcdvd_device;
-
-struct packet_data
-{
- struct list_head list;
-
- spinlock_t lock; /* Lock protecting state transitions and */
- /* orig_bios list */
-
- struct bio_list orig_bios; /* Original bios passed to pkt_make_request */
- /* that will be handled by this packet */
- int write_size; /* Total size of all bios in the orig_bios */
- /* list, measured in number of frames */
-
- struct bio *w_bio; /* The bio we will send to the real CD */
- /* device once we have all data for the */
- /* packet we are going to write */
- sector_t sector; /* First sector in this packet */
- int frames; /* Number of frames in this packet */
-
- enum packet_data_state state; /* Current state */
- atomic_t run_sm; /* Incremented whenever the state */
- /* machine needs to be run */
- long sleep_time; /* Set this to non-zero to make the state */
- /* machine run after this many jiffies. */
-
- atomic_t io_wait; /* Number of pending IO operations */
- atomic_t io_errors; /* Number of read/write errors during IO */
-
- struct bio *r_bios[PACKET_MAX_SIZE]; /* bios to use during data gathering */
- struct page *pages[PACKET_MAX_SIZE / FRAMES_PER_PAGE];
-
- int cache_valid; /* If non-zero, the data for the zone defined */
- /* by the sector variable is completely cached */
- /* in the pages[] vector. */
-
- int id; /* ID number for debugging */
- struct pktcdvd_device *pd;
-};
-
-struct pkt_rb_node {
- struct rb_node rb_node;
- struct bio *bio;
-};
-
-struct packet_stacked_data
-{
- struct bio *bio; /* Original read request bio */
- struct pktcdvd_device *pd;
-};
-#define PSD_POOL_SIZE 64
-
-struct pktcdvd_device
-{
- struct block_device *bdev; /* dev attached */
- dev_t pkt_dev; /* our dev */
- char name[20];
- struct packet_settings settings;
- struct packet_stats stats;
- int refcnt; /* Open count */
- int write_speed; /* current write speed, kB/s */
- int read_speed; /* current read speed, kB/s */
- unsigned long offset; /* start offset */
- __u8 mode_offset; /* 0 / 8 */
- __u8 type;
- unsigned long flags;
- __u16 mmc3_profile;
- __u32 nwa; /* next writable address */
- __u32 lra; /* last recorded address */
- struct packet_cdrw cdrw;
- wait_queue_head_t wqueue;
-
- spinlock_t lock; /* Serialize access to bio_queue */
- struct rb_root bio_queue; /* Work queue of bios we need to handle */
- int bio_queue_size; /* Number of nodes in bio_queue */
- bool congested; /* Someone is waiting for bio_queue_size
- * to drop. */
- sector_t current_sector; /* Keep track of where the elevator is */
- atomic_t scan_queue; /* Set to non-zero when pkt_handle_queue */
- /* needs to be run. */
- mempool_t rb_pool; /* mempool for pkt_rb_node allocations */
-
- struct packet_iosched iosched;
- struct gendisk *disk;
-
- int write_congestion_off;
- int write_congestion_on;
-
- struct device *dev; /* sysfs pktcdvd[0-7] dev */
-
- struct dentry *dfs_d_root; /* debugfs: devname directory */
- struct dentry *dfs_f_info; /* debugfs: info file */
-};
-
-#endif /* __PKTCDVD_H */
diff --git a/include/linux/platform_data/adp5588.h b/include/linux/platform_data/adp5588.h
deleted file mode 100644
index 6d3f7d911a92..000000000000
--- a/include/linux/platform_data/adp5588.h
+++ /dev/null
@@ -1,171 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Analog Devices ADP5588 I/O Expander and QWERTY Keypad Controller
- *
- * Copyright 2009-2010 Analog Devices Inc.
- */
-
-#ifndef _ADP5588_H
-#define _ADP5588_H
-
-#define DEV_ID 0x00 /* Device ID */
-#define CFG 0x01 /* Configuration Register1 */
-#define INT_STAT 0x02 /* Interrupt Status Register */
-#define KEY_LCK_EC_STAT 0x03 /* Key Lock and Event Counter Register */
-#define Key_EVENTA 0x04 /* Key Event Register A */
-#define Key_EVENTB 0x05 /* Key Event Register B */
-#define Key_EVENTC 0x06 /* Key Event Register C */
-#define Key_EVENTD 0x07 /* Key Event Register D */
-#define Key_EVENTE 0x08 /* Key Event Register E */
-#define Key_EVENTF 0x09 /* Key Event Register F */
-#define Key_EVENTG 0x0A /* Key Event Register G */
-#define Key_EVENTH 0x0B /* Key Event Register H */
-#define Key_EVENTI 0x0C /* Key Event Register I */
-#define Key_EVENTJ 0x0D /* Key Event Register J */
-#define KP_LCK_TMR 0x0E /* Keypad Lock1 to Lock2 Timer */
-#define UNLOCK1 0x0F /* Unlock Key1 */
-#define UNLOCK2 0x10 /* Unlock Key2 */
-#define GPIO_INT_STAT1 0x11 /* GPIO Interrupt Status */
-#define GPIO_INT_STAT2 0x12 /* GPIO Interrupt Status */
-#define GPIO_INT_STAT3 0x13 /* GPIO Interrupt Status */
-#define GPIO_DAT_STAT1 0x14 /* GPIO Data Status, Read twice to clear */
-#define GPIO_DAT_STAT2 0x15 /* GPIO Data Status, Read twice to clear */
-#define GPIO_DAT_STAT3 0x16 /* GPIO Data Status, Read twice to clear */
-#define GPIO_DAT_OUT1 0x17 /* GPIO DATA OUT */
-#define GPIO_DAT_OUT2 0x18 /* GPIO DATA OUT */
-#define GPIO_DAT_OUT3 0x19 /* GPIO DATA OUT */
-#define GPIO_INT_EN1 0x1A /* GPIO Interrupt Enable */
-#define GPIO_INT_EN2 0x1B /* GPIO Interrupt Enable */
-#define GPIO_INT_EN3 0x1C /* GPIO Interrupt Enable */
-#define KP_GPIO1 0x1D /* Keypad or GPIO Selection */
-#define KP_GPIO2 0x1E /* Keypad or GPIO Selection */
-#define KP_GPIO3 0x1F /* Keypad or GPIO Selection */
-#define GPI_EM1 0x20 /* GPI Event Mode 1 */
-#define GPI_EM2 0x21 /* GPI Event Mode 2 */
-#define GPI_EM3 0x22 /* GPI Event Mode 3 */
-#define GPIO_DIR1 0x23 /* GPIO Data Direction */
-#define GPIO_DIR2 0x24 /* GPIO Data Direction */
-#define GPIO_DIR3 0x25 /* GPIO Data Direction */
-#define GPIO_INT_LVL1 0x26 /* GPIO Edge/Level Detect */
-#define GPIO_INT_LVL2 0x27 /* GPIO Edge/Level Detect */
-#define GPIO_INT_LVL3 0x28 /* GPIO Edge/Level Detect */
-#define Debounce_DIS1 0x29 /* Debounce Disable */
-#define Debounce_DIS2 0x2A /* Debounce Disable */
-#define Debounce_DIS3 0x2B /* Debounce Disable */
-#define GPIO_PULL1 0x2C /* GPIO Pull Disable */
-#define GPIO_PULL2 0x2D /* GPIO Pull Disable */
-#define GPIO_PULL3 0x2E /* GPIO Pull Disable */
-#define CMP_CFG_STAT 0x30 /* Comparator Configuration and Status Register */
-#define CMP_CONFG_SENS1 0x31 /* Sensor1 Comparator Configuration Register */
-#define CMP_CONFG_SENS2 0x32 /* L2 Light Sensor Reference Level, Output Falling for Sensor 1 */
-#define CMP1_LVL2_TRIP 0x33 /* L2 Light Sensor Hysteresis (Active when Output Rising) for Sensor 1 */
-#define CMP1_LVL2_HYS 0x34 /* L3 Light Sensor Reference Level, Output Falling For Sensor 1 */
-#define CMP1_LVL3_TRIP 0x35 /* L3 Light Sensor Hysteresis (Active when Output Rising) For Sensor 1 */
-#define CMP1_LVL3_HYS 0x36 /* Sensor 2 Comparator Configuration Register */
-#define CMP2_LVL2_TRIP 0x37 /* L2 Light Sensor Reference Level, Output Falling for Sensor 2 */
-#define CMP2_LVL2_HYS 0x38 /* L2 Light Sensor Hysteresis (Active when Output Rising) for Sensor 2 */
-#define CMP2_LVL3_TRIP 0x39 /* L3 Light Sensor Reference Level, Output Falling For Sensor 2 */
-#define CMP2_LVL3_HYS 0x3A /* L3 Light Sensor Hysteresis (Active when Output Rising) For Sensor 2 */
-#define CMP1_ADC_DAT_R1 0x3B /* Comparator 1 ADC data Register1 */
-#define CMP1_ADC_DAT_R2 0x3C /* Comparator 1 ADC data Register2 */
-#define CMP2_ADC_DAT_R1 0x3D /* Comparator 2 ADC data Register1 */
-#define CMP2_ADC_DAT_R2 0x3E /* Comparator 2 ADC data Register2 */
-
-#define ADP5588_DEVICE_ID_MASK 0xF
-
- /* Configuration Register1 */
-#define ADP5588_AUTO_INC (1 << 7)
-#define ADP5588_GPIEM_CFG (1 << 6)
-#define ADP5588_OVR_FLOW_M (1 << 5)
-#define ADP5588_INT_CFG (1 << 4)
-#define ADP5588_OVR_FLOW_IEN (1 << 3)
-#define ADP5588_K_LCK_IM (1 << 2)
-#define ADP5588_GPI_IEN (1 << 1)
-#define ADP5588_KE_IEN (1 << 0)
-
-/* Interrupt Status Register */
-#define ADP5588_CMP2_INT (1 << 5)
-#define ADP5588_CMP1_INT (1 << 4)
-#define ADP5588_OVR_FLOW_INT (1 << 3)
-#define ADP5588_K_LCK_INT (1 << 2)
-#define ADP5588_GPI_INT (1 << 1)
-#define ADP5588_KE_INT (1 << 0)
-
-/* Key Lock and Event Counter Register */
-#define ADP5588_K_LCK_EN (1 << 6)
-#define ADP5588_LCK21 0x30
-#define ADP5588_KEC 0xF
-
-#define ADP5588_MAXGPIO 18
-#define ADP5588_BANK(offs) ((offs) >> 3)
-#define ADP5588_BIT(offs) (1u << ((offs) & 0x7))
-
-/* Put one of these structures in i2c_board_info platform_data */
-
-#define ADP5588_KEYMAPSIZE 80
-
-#define GPI_PIN_ROW0 97
-#define GPI_PIN_ROW1 98
-#define GPI_PIN_ROW2 99
-#define GPI_PIN_ROW3 100
-#define GPI_PIN_ROW4 101
-#define GPI_PIN_ROW5 102
-#define GPI_PIN_ROW6 103
-#define GPI_PIN_ROW7 104
-#define GPI_PIN_COL0 105
-#define GPI_PIN_COL1 106
-#define GPI_PIN_COL2 107
-#define GPI_PIN_COL3 108
-#define GPI_PIN_COL4 109
-#define GPI_PIN_COL5 110
-#define GPI_PIN_COL6 111
-#define GPI_PIN_COL7 112
-#define GPI_PIN_COL8 113
-#define GPI_PIN_COL9 114
-
-#define GPI_PIN_ROW_BASE GPI_PIN_ROW0
-#define GPI_PIN_ROW_END GPI_PIN_ROW7
-#define GPI_PIN_COL_BASE GPI_PIN_COL0
-#define GPI_PIN_COL_END GPI_PIN_COL9
-
-#define GPI_PIN_BASE GPI_PIN_ROW_BASE
-#define GPI_PIN_END GPI_PIN_COL_END
-
-#define ADP5588_GPIMAPSIZE_MAX (GPI_PIN_END - GPI_PIN_BASE + 1)
-
-struct adp5588_gpi_map {
- unsigned short pin;
- unsigned short sw_evt;
-};
-
-struct adp5588_kpad_platform_data {
- int rows; /* Number of rows */
- int cols; /* Number of columns */
- const unsigned short *keymap; /* Pointer to keymap */
- unsigned short keymapsize; /* Keymap size */
- unsigned repeat:1; /* Enable key repeat */
- unsigned en_keylock:1; /* Enable Key Lock feature */
- unsigned short unlock_key1; /* Unlock Key 1 */
- unsigned short unlock_key2; /* Unlock Key 2 */
- const struct adp5588_gpi_map *gpimap;
- unsigned short gpimapsize;
- const struct adp5588_gpio_platform_data *gpio_data;
-};
-
-struct i2c_client; /* forward declaration */
-
-struct adp5588_gpio_platform_data {
- int gpio_start; /* GPIO Chip base # */
- const char *const *names;
- unsigned irq_base; /* interrupt base # */
- unsigned pullup_dis_mask; /* Pull-Up Disable Mask */
- int (*setup)(struct i2c_client *client,
- unsigned gpio, unsigned ngpio,
- void *context);
- int (*teardown)(struct i2c_client *client,
- unsigned gpio, unsigned ngpio,
- void *context);
- void *context;
-};
-
-#endif
diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h
index 8b1b795867a1..5744a2d746aa 100644
--- a/include/linux/platform_data/cros_ec_commands.h
+++ b/include/linux/platform_data/cros_ec_commands.h
@@ -5724,8 +5724,21 @@ enum typec_control_command {
TYPEC_CONTROL_COMMAND_EXIT_MODES,
TYPEC_CONTROL_COMMAND_CLEAR_EVENTS,
TYPEC_CONTROL_COMMAND_ENTER_MODE,
+ TYPEC_CONTROL_COMMAND_TBT_UFP_REPLY,
+ TYPEC_CONTROL_COMMAND_USB_MUX_SET,
};
+/* Replies the AP may specify to the TBT EnterMode command as a UFP */
+enum typec_tbt_ufp_reply {
+ TYPEC_TBT_UFP_REPLY_NAK,
+ TYPEC_TBT_UFP_REPLY_ACK,
+};
+
+struct typec_usb_mux_set {
+ uint8_t mux_index; /* Index of the mux to set in the chain */
+ uint8_t mux_flags; /* USB_PD_MUX_*-encoded USB mux state to set */
+} __ec_align1;
+
struct ec_params_typec_control {
uint8_t port;
uint8_t command; /* enum typec_control_command */
@@ -5739,6 +5752,8 @@ struct ec_params_typec_control {
union {
uint32_t clear_events_mask;
uint8_t mode_to_enter; /* enum typec_mode */
+ uint8_t tbt_ufp_reply; /* enum typec_tbt_ufp_reply */
+ struct typec_usb_mux_set mux_params;
uint8_t placeholder[128];
};
} __ec_align1;
@@ -5817,6 +5832,9 @@ enum tcpc_cc_polarity {
#define PD_STATUS_EVENT_SOP_DISC_DONE BIT(0)
#define PD_STATUS_EVENT_SOP_PRIME_DISC_DONE BIT(1)
#define PD_STATUS_EVENT_HARD_RESET BIT(2)
+#define PD_STATUS_EVENT_DISCONNECTED BIT(3)
+#define PD_STATUS_EVENT_MUX_0_SET_DONE BIT(4)
+#define PD_STATUS_EVENT_MUX_1_SET_DONE BIT(5)
struct ec_params_typec_status {
uint8_t port;
diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h
index 408b29ca4004..e43107e0bee1 100644
--- a/include/linux/platform_data/cros_ec_proto.h
+++ b/include/linux/platform_data/cros_ec_proto.h
@@ -169,6 +169,7 @@ struct cros_ec_device {
int event_size;
u32 host_event_wake_mask;
u32 last_resume_result;
+ u16 suspend_timeout_ms;
ktime_t last_event_time;
struct notifier_block notifier_ready;
diff --git a/include/linux/platform_data/dma-hsu.h b/include/linux/platform_data/dma-hsu.h
index c65b412b2b33..611bae193c1c 100644
--- a/include/linux/platform_data/dma-hsu.h
+++ b/include/linux/platform_data/dma-hsu.h
@@ -8,7 +8,7 @@
#ifndef _PLATFORM_DATA_DMA_HSU_H
#define _PLATFORM_DATA_DMA_HSU_H
-#include <linux/device.h>
+struct device;
struct hsu_dma_slave {
struct device *dma_dev;
diff --git a/include/linux/platform_data/emc2305.h b/include/linux/platform_data/emc2305.h
new file mode 100644
index 000000000000..54d672dd6f7d
--- /dev/null
+++ b/include/linux/platform_data/emc2305.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __LINUX_PLATFORM_DATA_EMC2305__
+#define __LINUX_PLATFORM_DATA_EMC2305__
+
+#define EMC2305_PWM_MAX 5
+
+/**
+ * struct emc2305_platform_data - EMC2305 driver platform data
+ * @max_state: maximum cooling state of the cooling device;
+ * @pwm_num: number of active channels;
+ * @pwm_separate: separate PWM settings for every channel;
+ * @pwm_min: array of minimum PWM per channel;
+ */
+struct emc2305_platform_data {
+ u8 max_state;
+ u8 pwm_num;
+ bool pwm_separate;
+ u8 pwm_min[EMC2305_PWM_MAX];
+};
+
+#endif
diff --git a/include/linux/platform_data/gpmc-omap.h b/include/linux/platform_data/gpmc-omap.h
index c9cc4e32435d..dcca6c5e23bb 100644
--- a/include/linux/platform_data/gpmc-omap.h
+++ b/include/linux/platform_data/gpmc-omap.h
@@ -136,6 +136,13 @@ struct gpmc_device_timings {
#define GPMC_MUX_AAD 1 /* Addr-Addr-Data multiplex */
#define GPMC_MUX_AD 2 /* Addr-Data multiplex */
+/* Wait pin polarity values */
+#define GPMC_WAITPINPOLARITY_INVALID UINT_MAX
+#define GPMC_WAITPINPOLARITY_ACTIVE_LOW 0
+#define GPMC_WAITPINPOLARITY_ACTIVE_HIGH 1
+
+#define GPMC_WAITPIN_INVALID UINT_MAX
+
struct gpmc_settings {
bool burst_wrap; /* enables wrap bursting */
bool burst_read; /* enables read page/burst mode */
@@ -149,6 +156,7 @@ struct gpmc_settings {
u32 device_width; /* device bus width (8 or 16 bit) */
u32 mux_add_data; /* multiplex address & data */
u32 wait_pin; /* wait-pin to be used */
+ u32 wait_pin_polarity;
};
/* Data for each chip select */
diff --git a/include/linux/platform_data/gsc_hwmon.h b/include/linux/platform_data/gsc_hwmon.h
index 281f499eda97..f2781aa7eff8 100644
--- a/include/linux/platform_data/gsc_hwmon.h
+++ b/include/linux/platform_data/gsc_hwmon.h
@@ -29,18 +29,17 @@ struct gsc_hwmon_channel {
/**
* struct gsc_hwmon_platform_data - platform data for gsc_hwmon driver
- * @channels: pointer to array of gsc_hwmon_channel structures
- * describing channels
* @nchannels: number of elements in @channels array
* @vreference: voltage reference (mV)
* @resolution: ADC bit resolution
* @fan_base: register base for FAN controller
+ * @channels: array of gsc_hwmon_channel structures describing channels
*/
struct gsc_hwmon_platform_data {
- const struct gsc_hwmon_channel *channels;
int nchannels;
unsigned int resolution;
unsigned int vreference;
unsigned int fan_base;
+ struct gsc_hwmon_channel channels[];
};
#endif
diff --git a/include/linux/platform_data/pca953x.h b/include/linux/platform_data/pca953x.h
index 4eb53e023997..96c1a14ab365 100644
--- a/include/linux/platform_data/pca953x.h
+++ b/include/linux/platform_data/pca953x.h
@@ -22,7 +22,7 @@ struct pca953x_platform_data {
int (*setup)(struct i2c_client *client,
unsigned gpio, unsigned ngpio,
void *context);
- int (*teardown)(struct i2c_client *client,
+ void (*teardown)(struct i2c_client *client,
unsigned gpio, unsigned ngpio,
void *context);
const char *const *names;
diff --git a/include/linux/platform_data/ssm2518.h b/include/linux/platform_data/ssm2518.h
deleted file mode 100644
index 3f9e632d6f63..000000000000
--- a/include/linux/platform_data/ssm2518.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * SSM2518 amplifier audio driver
- *
- * Copyright 2013 Analog Devices Inc.
- * Author: Lars-Peter Clausen <lars@metafoo.de>
- */
-
-#ifndef __LINUX_PLATFORM_DATA_SSM2518_H__
-#define __LINUX_PLATFORM_DATA_SSM2518_H__
-
-/**
- * struct ssm2518_platform_data - Platform data for the ssm2518 driver
- * @enable_gpio: GPIO connected to the nSD pin. Set to -1 if the nSD pin is
- * hardwired.
- */
-struct ssm2518_platform_data {
- int enable_gpio;
-};
-
-#endif
diff --git a/include/linux/platform_data/st33zp24.h b/include/linux/platform_data/st33zp24.h
deleted file mode 100644
index 61db674f36cc..000000000000
--- a/include/linux/platform_data/st33zp24.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * STMicroelectronics TPM Linux driver for TPM 1.2 ST33ZP24
- * Copyright (C) 2009 - 2016 STMicroelectronics
- */
-#ifndef __ST33ZP24_H__
-#define __ST33ZP24_H__
-
-#define TPM_ST33_I2C "st33zp24-i2c"
-#define TPM_ST33_SPI "st33zp24-spi"
-
-struct st33zp24_platform_data {
- int io_lpcpd;
-};
-
-#endif /* __ST33ZP24_H__ */
diff --git a/include/linux/platform_data/tps68470.h b/include/linux/platform_data/tps68470.h
index 126d082c3f2e..e605a2cab07f 100644
--- a/include/linux/platform_data/tps68470.h
+++ b/include/linux/platform_data/tps68470.h
@@ -27,9 +27,14 @@ struct tps68470_regulator_platform_data {
const struct regulator_init_data *reg_init_data[TPS68470_NUM_REGULATORS];
};
-struct tps68470_clk_platform_data {
+struct tps68470_clk_consumer {
const char *consumer_dev_name;
const char *consumer_con_id;
};
+struct tps68470_clk_platform_data {
+ unsigned int n_consumers;
+ struct tps68470_clk_consumer consumers[];
+};
+
#endif
diff --git a/include/linux/platform_data/usb-s3c2410_udc.h b/include/linux/platform_data/usb-s3c2410_udc.h
index 07394819d03b..c0fbe1fe3426 100644
--- a/include/linux/platform_data/usb-s3c2410_udc.h
+++ b/include/linux/platform_data/usb-s3c2410_udc.h
@@ -22,12 +22,6 @@ enum s3c2410_udc_cmd_e {
struct s3c2410_udc_mach_info {
void (*udc_command)(enum s3c2410_udc_cmd_e);
void (*vbus_draw)(unsigned int ma);
-
- unsigned int pullup_pin;
- unsigned int pullup_pin_inverted;
-
- unsigned int vbus_pin;
- unsigned char vbus_pin_inverted;
};
extern void __init s3c24xx_udc_set_platdata(struct s3c2410_udc_mach_info *);
diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h
index 98f2b2f20f3e..28234dc9fa6a 100644
--- a/include/linux/platform_data/x86/asus-wmi.h
+++ b/include/linux/platform_data/x86/asus-wmi.h
@@ -65,6 +65,7 @@
#define ASUS_WMI_DEVID_PANEL_OD 0x00050019
#define ASUS_WMI_DEVID_CAMERA 0x00060013
#define ASUS_WMI_DEVID_LID_FLIP 0x00060062
+#define ASUS_WMI_DEVID_LID_FLIP_ROG 0x00060077
/* Storage */
#define ASUS_WMI_DEVID_CARDREADER 0x00080013
@@ -78,6 +79,7 @@
#define ASUS_WMI_DEVID_THERMAL_CTRL 0x00110011
#define ASUS_WMI_DEVID_FAN_CTRL 0x00110012 /* deprecated */
#define ASUS_WMI_DEVID_CPU_FAN_CTRL 0x00110013
+#define ASUS_WMI_DEVID_GPU_FAN_CTRL 0x00110014
#define ASUS_WMI_DEVID_CPU_FAN_CURVE 0x00110024
#define ASUS_WMI_DEVID_GPU_FAN_CURVE 0x00110025
@@ -99,6 +101,15 @@
/* dgpu on/off */
#define ASUS_WMI_DEVID_DGPU 0x00090020
+/* gpu mux switch, 0 = dGPU, 1 = Optimus */
+#define ASUS_WMI_DEVID_GPU_MUX 0x00090016
+
+/* TUF laptop RGB modes/colours */
+#define ASUS_WMI_DEVID_TUF_RGB_MODE 0x00100056
+
+/* TUF laptop RGB power/state */
+#define ASUS_WMI_DEVID_TUF_RGB_STATE 0x00100057
+
/* DSTS masks */
#define ASUS_WMI_DSTS_STATUS_BIT 0x00000001
#define ASUS_WMI_DSTS_UNKNOWN_BIT 0x00000002
diff --git a/include/linux/platform_data/x86/nvidia-wmi-ec-backlight.h b/include/linux/platform_data/x86/nvidia-wmi-ec-backlight.h
new file mode 100644
index 000000000000..23d60130272c
--- /dev/null
+++ b/include/linux/platform_data/x86/nvidia-wmi-ec-backlight.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+ */
+
+#ifndef __PLATFORM_DATA_X86_NVIDIA_WMI_EC_BACKLIGHT_H
+#define __PLATFORM_DATA_X86_NVIDIA_WMI_EC_BACKLIGHT_H
+
+#define WMI_BRIGHTNESS_GUID "603E9613-EF25-4338-A3D0-C46177516DB7"
+
+/**
+ * enum wmi_brightness_method - WMI method IDs
+ * @WMI_BRIGHTNESS_METHOD_LEVEL: Get/Set EC brightness level status
+ * @WMI_BRIGHTNESS_METHOD_SOURCE: Get/Set EC Brightness Source
+ */
+enum wmi_brightness_method {
+ WMI_BRIGHTNESS_METHOD_LEVEL = 1,
+ WMI_BRIGHTNESS_METHOD_SOURCE = 2,
+ WMI_BRIGHTNESS_METHOD_MAX
+};
+
+/**
+ * enum wmi_brightness_mode - Operation mode for WMI-wrapped method
+ * @WMI_BRIGHTNESS_MODE_GET: Get the current brightness level/source.
+ * @WMI_BRIGHTNESS_MODE_SET: Set the brightness level.
+ * @WMI_BRIGHTNESS_MODE_GET_MAX_LEVEL: Get the maximum brightness level. This
+ * is only valid when the WMI method is
+ * %WMI_BRIGHTNESS_METHOD_LEVEL.
+ */
+enum wmi_brightness_mode {
+ WMI_BRIGHTNESS_MODE_GET = 0,
+ WMI_BRIGHTNESS_MODE_SET = 1,
+ WMI_BRIGHTNESS_MODE_GET_MAX_LEVEL = 2,
+ WMI_BRIGHTNESS_MODE_MAX
+};
+
+/**
+ * enum wmi_brightness_source - Backlight brightness control source selection
+ * @WMI_BRIGHTNESS_SOURCE_GPU: Backlight brightness is controlled by the GPU.
+ * @WMI_BRIGHTNESS_SOURCE_EC: Backlight brightness is controlled by the
+ * system's Embedded Controller (EC).
+ * @WMI_BRIGHTNESS_SOURCE_AUX: Backlight brightness is controlled over the
+ * DisplayPort AUX channel.
+ */
+enum wmi_brightness_source {
+ WMI_BRIGHTNESS_SOURCE_GPU = 1,
+ WMI_BRIGHTNESS_SOURCE_EC = 2,
+ WMI_BRIGHTNESS_SOURCE_AUX = 3,
+ WMI_BRIGHTNESS_SOURCE_MAX
+};
+
+/**
+ * struct wmi_brightness_args - arguments for the WMI-wrapped ACPI method
+ * @mode: Pass in an &enum wmi_brightness_mode value to select between
+ * getting or setting a value.
+ * @val: In parameter for value to set when using %WMI_BRIGHTNESS_MODE_SET
+ * mode. Not used in conjunction with %WMI_BRIGHTNESS_MODE_GET or
+ * %WMI_BRIGHTNESS_MODE_GET_MAX_LEVEL mode.
+ * @ret: Out parameter returning retrieved value when operating in
+ * %WMI_BRIGHTNESS_MODE_GET or %WMI_BRIGHTNESS_MODE_GET_MAX_LEVEL
+ * mode. Not used in %WMI_BRIGHTNESS_MODE_SET mode.
+ * @ignored: Padding; not used. The ACPI method expects a 24 byte params struct.
+ *
+ * This is the parameters structure for the WmiBrightnessNotify ACPI method as
+ * wrapped by WMI. The value passed in to @val or returned by @ret will be a
+ * brightness value when the WMI method ID is %WMI_BRIGHTNESS_METHOD_LEVEL, or
+ * an &enum wmi_brightness_source value with %WMI_BRIGHTNESS_METHOD_SOURCE.
+ */
+struct wmi_brightness_args {
+ u32 mode;
+ u32 val;
+ u32 ret;
+ u32 ignored[3];
+};
+
+#endif
diff --git a/include/linux/platform_data/x86/pmc_atom.h b/include/linux/platform_data/x86/pmc_atom.h
index 3edfb6d4e67a..b8a701c77fd0 100644
--- a/include/linux/platform_data/x86/pmc_atom.h
+++ b/include/linux/platform_data/x86/pmc_atom.h
@@ -1,12 +1,14 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Intel Atom SOC Power Management Controller Header File
- * Copyright (c) 2014, Intel Corporation.
+ * Intel Atom SoC Power Management Controller Header File
+ * Copyright (c) 2014-2015,2022 Intel Corporation.
*/
#ifndef PMC_ATOM_H
#define PMC_ATOM_H
+#include <linux/bits.h>
+
/* ValleyView Power Control Unit PCI Device ID */
#define PCI_DEVICE_ID_VLV_PMC 0x0F1C
/* CherryTrail Power Control Unit PCI Device ID */
@@ -139,9 +141,9 @@
#define ACPI_MMIO_REG_LEN 0x100
#define PM1_CNT 0x4
-#define SLEEP_TYPE_MASK 0xFFFFECFF
+#define SLEEP_TYPE_MASK GENMASK(12, 10)
#define SLEEP_TYPE_S5 0x1C00
-#define SLEEP_ENABLE 0x2000
+#define SLEEP_ENABLE BIT(13)
extern int pmc_atom_read(int offset, u32 *value);
diff --git a/include/linux/platform_data/x86/pwm-lpss.h b/include/linux/platform_data/x86/pwm-lpss.h
new file mode 100644
index 000000000000..c852fe24fe2a
--- /dev/null
+++ b/include/linux/platform_data/x86/pwm-lpss.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Intel Low Power Subsystem PWM controller driver */
+
+#ifndef __PLATFORM_DATA_X86_PWM_LPSS_H
+#define __PLATFORM_DATA_X86_PWM_LPSS_H
+
+#include <linux/types.h>
+
+struct device;
+
+struct pwm_lpss_chip;
+
+struct pwm_lpss_boardinfo {
+ unsigned long clk_rate;
+ unsigned int npwm;
+ unsigned long base_unit_bits;
+ /*
+ * Some versions of the IP may stuck in the state machine if enable
+ * bit is not set, and hence update bit will show busy status till
+ * the reset. For the rest it may be otherwise.
+ */
+ bool bypass;
+ /*
+ * On some devices the _PS0/_PS3 AML code of the GPU (GFX0) device
+ * messes with the PWM0 controllers state,
+ */
+ bool other_devices_aml_touches_pwm_regs;
+};
+
+struct pwm_lpss_chip *devm_pwm_lpss_probe(struct device *dev, void __iomem *base,
+ const struct pwm_lpss_boardinfo *info);
+
+#endif /* __PLATFORM_DATA_X86_PWM_LPSS_H */
diff --git a/include/linux/platform_data/x86/simatic-ipc-base.h b/include/linux/platform_data/x86/simatic-ipc-base.h
index 39fefd48cf4d..57d6a10dfc9e 100644
--- a/include/linux/platform_data/x86/simatic-ipc-base.h
+++ b/include/linux/platform_data/x86/simatic-ipc-base.h
@@ -19,6 +19,7 @@
#define SIMATIC_IPC_DEVICE_427E 2
#define SIMATIC_IPC_DEVICE_127E 3
#define SIMATIC_IPC_DEVICE_227E 4
+#define SIMATIC_IPC_DEVICE_227G 5
struct simatic_ipc_platform {
u8 devmode;
diff --git a/include/linux/platform_data/x86/simatic-ipc.h b/include/linux/platform_data/x86/simatic-ipc.h
index f3b76b39776b..632320ec8f08 100644
--- a/include/linux/platform_data/x86/simatic-ipc.h
+++ b/include/linux/platform_data/x86/simatic-ipc.h
@@ -31,6 +31,8 @@ enum simatic_ipc_station_ids {
SIMATIC_IPC_IPC427E = 0x00000A01,
SIMATIC_IPC_IPC477E = 0x00000A02,
SIMATIC_IPC_IPC127E = 0x00000D01,
+ SIMATIC_IPC_IPC227G = 0x00000F01,
+ SIMATIC_IPC_IPC427G = 0x00001001,
};
static inline u32 simatic_ipc_get_station_id(u8 *data, int max_len)
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 871c9c49ec9d..93cd34f00822 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -375,19 +375,20 @@ const struct dev_pm_ops name = { \
}
#ifdef CONFIG_PM
-#define _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, runtime_suspend_fn, \
- runtime_resume_fn, idle_fn, sec, ns) \
- _DEFINE_DEV_PM_OPS(name, suspend_fn, resume_fn, runtime_suspend_fn, \
- runtime_resume_fn, idle_fn); \
- __EXPORT_SYMBOL(name, sec, ns)
+#define _EXPORT_DEV_PM_OPS(name, sec, ns) \
+ const struct dev_pm_ops name; \
+ __EXPORT_SYMBOL(name, sec, ns); \
+ const struct dev_pm_ops name
#else
-#define _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, runtime_suspend_fn, \
- runtime_resume_fn, idle_fn, sec, ns) \
-static __maybe_unused _DEFINE_DEV_PM_OPS(__static_##name, suspend_fn, \
- resume_fn, runtime_suspend_fn, \
- runtime_resume_fn, idle_fn)
+#define _EXPORT_DEV_PM_OPS(name, sec, ns) \
+ static __maybe_unused const struct dev_pm_ops __static_##name
#endif
+#define EXPORT_DEV_PM_OPS(name) _EXPORT_DEV_PM_OPS(name, "", "")
+#define EXPORT_GPL_DEV_PM_OPS(name) _EXPORT_DEV_PM_OPS(name, "_gpl", "")
+#define EXPORT_NS_DEV_PM_OPS(name, ns) _EXPORT_DEV_PM_OPS(name, "", #ns)
+#define EXPORT_NS_GPL_DEV_PM_OPS(name, ns) _EXPORT_DEV_PM_OPS(name, "_gpl", #ns)
+
/*
* Use this if you want to use the same suspend and resume callbacks for suspend
* to RAM and hibernation.
@@ -399,13 +400,21 @@ static __maybe_unused _DEFINE_DEV_PM_OPS(__static_##name, suspend_fn, \
_DEFINE_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL)
#define EXPORT_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \
- _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "", "")
+ EXPORT_DEV_PM_OPS(name) = { \
+ SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
+ }
#define EXPORT_GPL_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \
- _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "_gpl", "")
+ EXPORT_GPL_DEV_PM_OPS(name) = { \
+ SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
+ }
#define EXPORT_NS_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn, ns) \
- _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "", #ns)
+ EXPORT_NS_DEV_PM_OPS(name, ns) = { \
+ SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
+ }
#define EXPORT_NS_GPL_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn, ns) \
- _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "_gpl", #ns)
+ EXPORT_NS_GPL_DEV_PM_OPS(name, ns) = { \
+ SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
+ }
/* Deprecated. Use DEFINE_SIMPLE_DEV_PM_OPS() instead. */
#define SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index ebc351698090..1cd41bdf73cf 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -17,6 +17,7 @@
#include <linux/notifier.h>
#include <linux/spinlock.h>
#include <linux/cpumask.h>
+#include <linux/time64.h>
/*
* Flags to control the behaviour of a genpd.
@@ -95,6 +96,7 @@ struct genpd_governor_data {
s64 max_off_time_ns;
bool max_off_time_changed;
ktime_t next_wakeup;
+ ktime_t next_hrtimer;
bool cached_power_down_ok;
bool cached_power_down_state_idx;
};
@@ -232,6 +234,7 @@ int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state);
int dev_pm_genpd_add_notifier(struct device *dev, struct notifier_block *nb);
int dev_pm_genpd_remove_notifier(struct device *dev);
void dev_pm_genpd_set_next_wakeup(struct device *dev, ktime_t next);
+ktime_t dev_pm_genpd_get_next_hrtimer(struct device *dev);
extern struct dev_power_governor simple_qos_governor;
extern struct dev_power_governor pm_domain_always_on_gov;
@@ -293,6 +296,10 @@ static inline int dev_pm_genpd_remove_notifier(struct device *dev)
static inline void dev_pm_genpd_set_next_wakeup(struct device *dev, ktime_t next)
{ }
+static inline ktime_t dev_pm_genpd_get_next_hrtimer(struct device *dev)
+{
+ return KTIME_MAX;
+}
#define simple_qos_governor (*(struct dev_power_governor *)(NULL))
#define pm_domain_always_on_gov (*(struct dev_power_governor *)(NULL))
#endif
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 0a41b2dcccad..9a8151a2bdea 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -40,17 +40,21 @@
resume_fn, idle_fn)
#define EXPORT_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \
- _EXPORT_DEV_PM_OPS(name, pm_runtime_force_suspend, pm_runtime_force_resume, \
- suspend_fn, resume_fn, idle_fn, "", "")
+ EXPORT_DEV_PM_OPS(name) = { \
+ RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \
+ }
#define EXPORT_GPL_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \
- _EXPORT_DEV_PM_OPS(name, pm_runtime_force_suspend, pm_runtime_force_resume, \
- suspend_fn, resume_fn, idle_fn, "_gpl", "")
+ EXPORT_GPL_DEV_PM_OPS(name) = { \
+ RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \
+ }
#define EXPORT_NS_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn, ns) \
- _EXPORT_DEV_PM_OPS(name, pm_runtime_force_suspend, pm_runtime_force_resume, \
- suspend_fn, resume_fn, idle_fn, "", #ns)
+ EXPORT_NS_DEV_PM_OPS(name, ns) = { \
+ RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \
+ }
#define EXPORT_NS_GPL_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn, ns) \
- _EXPORT_DEV_PM_OPS(name, pm_runtime_force_suspend, pm_runtime_force_resume, \
- suspend_fn, resume_fn, idle_fn, "_gpl", #ns)
+ EXPORT_NS_GPL_DEV_PM_OPS(name, ns) = { \
+ RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \
+ }
#ifdef CONFIG_PM
extern struct workqueue_struct *pm_wq;
diff --git a/include/linux/poison.h b/include/linux/poison.h
index d62ef5a6b4e9..2d3249eb0e62 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -81,4 +81,7 @@
/********** net/core/page_pool.c **********/
#define PP_SIGNATURE (0x40 + POISON_POINTER_DELTA)
+/********** kernel/bpf/ **********/
+#define BPF_PTR_POISON ((void *)(0xeB9FUL + POISON_POINTER_DELTA))
+
#endif
diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h
index 7d1e604c1325..ee608d22ecb9 100644
--- a/include/linux/posix_acl.h
+++ b/include/linux/posix_acl.h
@@ -69,21 +69,21 @@ extern int __posix_acl_create(struct posix_acl **, gfp_t, umode_t *);
extern int __posix_acl_chmod(struct posix_acl **, gfp_t, umode_t);
extern struct posix_acl *get_posix_acl(struct inode *, int);
-extern int set_posix_acl(struct user_namespace *, struct inode *, int,
- struct posix_acl *);
+int set_posix_acl(struct user_namespace *, struct dentry *, int,
+ struct posix_acl *);
struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type);
struct posix_acl *posix_acl_clone(const struct posix_acl *acl, gfp_t flags);
#ifdef CONFIG_FS_POSIX_ACL
-int posix_acl_chmod(struct user_namespace *, struct inode *, umode_t);
+int posix_acl_chmod(struct user_namespace *, struct dentry *, umode_t);
extern int posix_acl_create(struct inode *, umode_t *, struct posix_acl **,
struct posix_acl **);
int posix_acl_update_mode(struct user_namespace *, struct inode *, umode_t *,
struct posix_acl **);
-extern int simple_set_acl(struct user_namespace *, struct inode *,
- struct posix_acl *, int);
+int simple_set_acl(struct user_namespace *, struct dentry *,
+ struct posix_acl *, int);
extern int simple_acl_create(struct inode *, struct inode *);
struct posix_acl *get_cached_acl(struct inode *inode, int type);
@@ -99,9 +99,16 @@ static inline void cache_no_acl(struct inode *inode)
inode->i_acl = NULL;
inode->i_default_acl = NULL;
}
+
+int vfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+ const char *acl_name, struct posix_acl *kacl);
+struct posix_acl *vfs_get_acl(struct user_namespace *mnt_userns,
+ struct dentry *dentry, const char *acl_name);
+int vfs_remove_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+ const char *acl_name);
#else
static inline int posix_acl_chmod(struct user_namespace *mnt_userns,
- struct inode *inode, umode_t mode)
+ struct dentry *dentry, umode_t mode)
{
return 0;
}
@@ -126,8 +133,28 @@ static inline int posix_acl_create(struct inode *inode, umode_t *mode,
static inline void forget_all_cached_acls(struct inode *inode)
{
}
+
+static inline int vfs_set_acl(struct user_namespace *mnt_userns,
+ struct dentry *dentry, const char *name,
+ struct posix_acl *acl)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline struct posix_acl *vfs_get_acl(struct user_namespace *mnt_userns,
+ struct dentry *dentry,
+ const char *acl_name)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline int vfs_remove_acl(struct user_namespace *mnt_userns,
+ struct dentry *dentry, const char *acl_name)
+{
+ return -EOPNOTSUPP;
+}
#endif /* CONFIG_FS_POSIX_ACL */
-struct posix_acl *get_acl(struct inode *inode, int type);
+struct posix_acl *get_inode_acl(struct inode *inode, int type);
#endif /* __LINUX_POSIX_ACL_H */
diff --git a/include/linux/posix_acl_xattr.h b/include/linux/posix_acl_xattr.h
index b6bd3eac2bcc..54cd7a14330d 100644
--- a/include/linux/posix_acl_xattr.h
+++ b/include/linux/posix_acl_xattr.h
@@ -33,39 +33,40 @@ posix_acl_xattr_count(size_t size)
}
#ifdef CONFIG_FS_POSIX_ACL
-void posix_acl_fix_xattr_from_user(void *value, size_t size);
-void posix_acl_fix_xattr_to_user(void *value, size_t size);
-void posix_acl_getxattr_idmapped_mnt(struct user_namespace *mnt_userns,
- const struct inode *inode,
- void *value, size_t size);
-void posix_acl_setxattr_idmapped_mnt(struct user_namespace *mnt_userns,
- const struct inode *inode,
- void *value, size_t size);
+struct posix_acl *posix_acl_from_xattr(struct user_namespace *user_ns,
+ const void *value, size_t size);
#else
-static inline void posix_acl_fix_xattr_from_user(void *value, size_t size)
-{
-}
-static inline void posix_acl_fix_xattr_to_user(void *value, size_t size)
-{
-}
-static inline void
-posix_acl_getxattr_idmapped_mnt(struct user_namespace *mnt_userns,
- const struct inode *inode, void *value,
- size_t size)
-{
-}
-static inline void
-posix_acl_setxattr_idmapped_mnt(struct user_namespace *mnt_userns,
- const struct inode *inode, void *value,
- size_t size)
+static inline struct posix_acl *
+posix_acl_from_xattr(struct user_namespace *user_ns, const void *value,
+ size_t size)
{
+ return ERR_PTR(-EOPNOTSUPP);
}
#endif
-struct posix_acl *posix_acl_from_xattr(struct user_namespace *user_ns,
- const void *value, size_t size);
int posix_acl_to_xattr(struct user_namespace *user_ns,
const struct posix_acl *acl, void *buffer, size_t size);
+static inline const char *posix_acl_xattr_name(int type)
+{
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ return XATTR_NAME_POSIX_ACL_ACCESS;
+ case ACL_TYPE_DEFAULT:
+ return XATTR_NAME_POSIX_ACL_DEFAULT;
+ }
+
+ return "";
+}
+
+static inline int posix_acl_type(const char *name)
+{
+ if (strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS) == 0)
+ return ACL_TYPE_ACCESS;
+ else if (strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)
+ return ACL_TYPE_DEFAULT;
+
+ return -1;
+}
extern const struct xattr_handler posix_acl_access_xattr_handler;
extern const struct xattr_handler posix_acl_default_xattr_handler;
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index cb380c1d9459..aa2c4a7c4826 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -374,9 +374,37 @@ struct power_supply_vbat_ri_table {
* These timers should be chosen to align with the typical discharge curve
* for the battery.
*
- * When the main CC/CV charging is complete the battery can optionally be
- * maintenance charged at the voltages from this table: a table of settings is
- * traversed using a slightly lower current and voltage than what is used for
+ * Ordinary CC/CV charging will stop charging when the charge current goes
+ * below charge_term_current_ua, and then restart it (if the device is still
+ * plugged into the charger) at charge_restart_voltage_uv. This happens in most
+ * consumer products because the power usage while connected to a charger is
+ * not zero, and devices are not manufactured to draw power directly from the
+ * charger: instead they will at all times dissipate the battery a little, like
+ * the power used in standby mode. This will over time give a charge graph
+ * such as this:
+ *
+ * Energy
+ * ^ ... ... ... ... ... ... ...
+ * | . . . . . . . . . . . . .
+ * | .. . .. . .. . .. . .. . .. . ..
+ * |. .. .. .. .. .. ..
+ * +-------------------------------------------------------------------> t
+ *
+ * Practically this means that the Li-ions are wandering back and forth in the
+ * battery and this causes degeneration of the battery anode and cathode.
+ * To prolong the life of the battery, maintenance charging is applied after
+ * reaching charge_term_current_ua to hold up the charge in the battery while
+ * consuming power, thus lowering the wear on the battery:
+ *
+ * Energy
+ * ^ .......................................
+ * | . ......................
+ * | ..
+ * |.
+ * +-------------------------------------------------------------------> t
+ *
+ * Maintenance charging uses the voltages from this table: a table of settings
+ * is traversed using a slightly lower current and voltage than what is used for
* CC/CV charging. The maintenance charging will for safety reasons not go on
* indefinately: we lower the current and voltage with successive maintenance
* settings, then disable charging completely after we reach the last one,
@@ -385,14 +413,22 @@ struct power_supply_vbat_ri_table {
* ordinary CC/CV charging from there.
*
* As an example, a Samsung EB425161LA Lithium-Ion battery is CC/CV charged
- * at 900mA to 4340mV, then maintenance charged at 600mA and 4150mV for
- * 60 hours, then maintenance charged at 600mA and 4100mV for 200 hours.
+ * at 900mA to 4340mV, then maintenance charged at 600mA and 4150mV for up to
+ * 60 hours, then maintenance charged at 600mA and 4100mV for up to 200 hours.
* After this the charge cycle is restarted waiting for
* charge_restart_voltage_uv.
*
* For most mobile electronics this type of maintenance charging is enough for
* the user to disconnect the device and make use of it before both maintenance
- * charging cycles are complete.
+ * charging cycles are complete, if the current and voltage has been chosen
+ * appropriately. These need to be determined from battery discharge curves
+ * and expected standby current.
+ *
+ * If the voltage anyway drops to charge_restart_voltage_uv during maintenance
+ * charging, ordinary CC/CV charging is restarted. This can happen if the
+ * device is e.g. actively used during charging, so more current is drawn than
+ * the expected stand-by current. Also overvoltage protection will be applied
+ * as usual.
*/
struct power_supply_maintenance_charge_table {
int charge_current_max_ua;
diff --git a/include/linux/prandom.h b/include/linux/prandom.h
index deace5fb4e62..c94c02ba065c 100644
--- a/include/linux/prandom.h
+++ b/include/linux/prandom.h
@@ -9,19 +9,10 @@
#define _LINUX_PRANDOM_H
#include <linux/types.h>
+#include <linux/once.h>
#include <linux/percpu.h>
#include <linux/random.h>
-static inline u32 prandom_u32(void)
-{
- return get_random_u32();
-}
-
-static inline void prandom_bytes(void *buf, size_t nbytes)
-{
- return get_random_bytes(buf, nbytes);
-}
-
struct rnd_state {
__u32 s1, s2, s3, s4;
};
@@ -33,21 +24,10 @@ void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state);
#define prandom_init_once(pcpu_state) \
DO_ONCE(prandom_seed_full_state, (pcpu_state))
-/**
- * prandom_u32_max - returns a pseudo-random number in interval [0, ep_ro)
- * @ep_ro: right open interval endpoint
- *
- * Returns a pseudo-random number that is in interval [0, ep_ro). Note
- * that the result depends on PRNG being well distributed in [0, ~0U]
- * u32 space. Here we use maximally equidistributed combined Tausworthe
- * generator, that is, prandom_u32(). This is useful when requesting a
- * random index of an array containing ep_ro elements, for example.
- *
- * Returns: pseudo-random number in interval [0, ep_ro)
- */
+/* Deprecated: use get_random_u32_below() instead. */
static inline u32 prandom_u32_max(u32 ep_ro)
{
- return (u32)(((u64) prandom_u32() * ep_ro) >> 32);
+ return get_random_u32_below(ep_ro);
}
/*
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index b4381f255a5c..0df425bf9bd7 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -421,4 +421,46 @@ static inline void migrate_enable(void) { }
#endif /* CONFIG_SMP */
+/**
+ * preempt_disable_nested - Disable preemption inside a normally preempt disabled section
+ *
+ * Use for code which requires preemption protection inside a critical
+ * section which has preemption disabled implicitly on non-PREEMPT_RT
+ * enabled kernels, by e.g.:
+ * - holding a spinlock/rwlock
+ * - soft interrupt context
+ * - regular interrupt handlers
+ *
+ * On PREEMPT_RT enabled kernels spinlock/rwlock held sections, soft
+ * interrupt context and regular interrupt handlers are preemptible and
+ * only prevent migration. preempt_disable_nested() ensures that preemption
+ * is disabled for cases which require CPU local serialization even on
+ * PREEMPT_RT. For non-PREEMPT_RT kernels this is a NOP.
+ *
+ * The use cases are code sequences which are not serialized by a
+ * particular lock instance, e.g.:
+ * - seqcount write side critical sections where the seqcount is not
+ * associated to a particular lock and therefore the automatic
+ * protection mechanism does not work. This prevents a live lock
+ * against a preempting high priority reader.
+ * - RMW per CPU variable updates like vmstat.
+ */
+/* Macro to avoid header recursion hell vs. lockdep */
+#define preempt_disable_nested() \
+do { \
+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) \
+ preempt_disable(); \
+ else \
+ lockdep_assert_preemption_disabled(); \
+} while (0)
+
+/**
+ * preempt_enable_nested - Undo the effect of preempt_disable_nested()
+ */
+static __always_inline void preempt_enable_nested(void)
+{
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ preempt_enable();
+}
+
#endif /* __LINUX_PREEMPT_H */
diff --git a/include/linux/printk.h b/include/linux/printk.h
index cf7d666ab1f8..8c81806c2e99 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -169,8 +169,6 @@ extern void __printk_safe_exit(void);
#define printk_deferred_enter __printk_safe_enter
#define printk_deferred_exit __printk_safe_exit
-extern bool pr_flush(int timeout_ms, bool reset_on_progress);
-
/*
* Please don't use printk_ratelimit(), because it shares ratelimiting state
* with all other unrelated printk_ratelimit() callsites. Instead use
@@ -221,11 +219,6 @@ static inline void printk_deferred_exit(void)
{
}
-static inline bool pr_flush(int timeout_ms, bool reset_on_progress)
-{
- return true;
-}
-
static inline int printk_ratelimit(void)
{
return 0;
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 81d6e4ec2294..0260f5ea98fe 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -208,8 +208,10 @@ static inline void proc_remove(struct proc_dir_entry *de) {}
static inline int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) { return 0; }
#define proc_create_net_data(name, mode, parent, ops, state_size, data) ({NULL;})
+#define proc_create_net_data_write(name, mode, parent, ops, write, state_size, data) ({NULL;})
#define proc_create_net(name, mode, parent, state_size, ops) ({NULL;})
#define proc_create_net_single(name, mode, parent, show, data) ({NULL;})
+#define proc_create_net_single_write(name, mode, parent, show, write, data) ({NULL;})
static inline struct pid *tgid_pidfd_to_pid(const struct file *file)
{
diff --git a/include/linux/property.h b/include/linux/property.h
index a5b429d623f6..67371c963134 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -32,7 +32,7 @@ enum dev_dma_attr {
DEV_DMA_COHERENT,
};
-struct fwnode_handle *dev_fwnode(struct device *dev);
+struct fwnode_handle *dev_fwnode(const struct device *dev);
bool device_property_present(struct device *dev, const char *propname);
int device_property_read_u8_array(struct device *dev, const char *propname,
@@ -50,7 +50,6 @@ int device_property_read_string(struct device *dev, const char *propname,
int device_property_match_string(struct device *dev,
const char *propname, const char *string);
-bool fwnode_device_is_available(const struct fwnode_handle *fwnode);
bool fwnode_property_present(const struct fwnode_handle *fwnode,
const char *propname);
int fwnode_property_read_u8_array(const struct fwnode_handle *fwnode,
@@ -72,6 +71,15 @@ int fwnode_property_read_string(const struct fwnode_handle *fwnode,
const char *propname, const char **val);
int fwnode_property_match_string(const struct fwnode_handle *fwnode,
const char *propname, const char *string);
+
+bool fwnode_device_is_available(const struct fwnode_handle *fwnode);
+
+static inline
+bool fwnode_device_is_compatible(const struct fwnode_handle *fwnode, const char *compat)
+{
+ return fwnode_property_match_string(fwnode, "compatible", compat) >= 0;
+}
+
int fwnode_property_get_reference_args(const struct fwnode_handle *fwnode,
const char *prop, const char *nargs_prop,
unsigned int nargs, unsigned int index,
@@ -387,7 +395,7 @@ bool device_dma_supported(struct device *dev);
enum dev_dma_attr device_get_dma_attr(struct device *dev);
-const void *device_get_match_data(struct device *dev);
+const void *device_get_match_data(const struct device *dev);
int device_get_phy_mode(struct device *dev);
int fwnode_get_phy_mode(struct fwnode_handle *fwnode);
diff --git a/include/linux/pse-pd/pse.h b/include/linux/pse-pd/pse.h
new file mode 100644
index 000000000000..fb724c65c77b
--- /dev/null
+++ b/include/linux/pse-pd/pse.h
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+// Copyright (c) 2022 Pengutronix, Oleksij Rempel <kernel@pengutronix.de>
+ */
+#ifndef _LINUX_PSE_CONTROLLER_H
+#define _LINUX_PSE_CONTROLLER_H
+
+#include <linux/ethtool.h>
+#include <linux/list.h>
+#include <uapi/linux/ethtool.h>
+
+struct phy_device;
+struct pse_controller_dev;
+
+/**
+ * struct pse_control_config - PSE control/channel configuration.
+ *
+ * @admin_cotrol: set PoDL PSE admin control as described in
+ * IEEE 802.3-2018 30.15.1.2.1 acPoDLPSEAdminControl
+ */
+struct pse_control_config {
+ enum ethtool_podl_pse_admin_state admin_cotrol;
+};
+
+/**
+ * struct pse_control_status - PSE control/channel status.
+ *
+ * @podl_admin_state: operational state of the PoDL PSE
+ * functions. IEEE 802.3-2018 30.15.1.1.2 aPoDLPSEAdminState
+ * @podl_pw_status: power detection status of the PoDL PSE.
+ * IEEE 802.3-2018 30.15.1.1.3 aPoDLPSEPowerDetectionStatus:
+ */
+struct pse_control_status {
+ enum ethtool_podl_pse_admin_state podl_admin_state;
+ enum ethtool_podl_pse_pw_d_status podl_pw_status;
+};
+
+/**
+ * struct pse_controller_ops - PSE controller driver callbacks
+ *
+ * @ethtool_get_status: get PSE control status for ethtool interface
+ * @ethtool_set_config: set PSE control configuration over ethtool interface
+ */
+struct pse_controller_ops {
+ int (*ethtool_get_status)(struct pse_controller_dev *pcdev,
+ unsigned long id, struct netlink_ext_ack *extack,
+ struct pse_control_status *status);
+ int (*ethtool_set_config)(struct pse_controller_dev *pcdev,
+ unsigned long id, struct netlink_ext_ack *extack,
+ const struct pse_control_config *config);
+};
+
+struct module;
+struct device_node;
+struct of_phandle_args;
+struct pse_control;
+
+/**
+ * struct pse_controller_dev - PSE controller entity that might
+ * provide multiple PSE controls
+ * @ops: a pointer to device specific struct pse_controller_ops
+ * @owner: kernel module of the PSE controller driver
+ * @list: internal list of PSE controller devices
+ * @pse_control_head: head of internal list of requested PSE controls
+ * @dev: corresponding driver model device struct
+ * @of_pse_n_cells: number of cells in PSE line specifiers
+ * @of_xlate: translation function to translate from specifier as found in the
+ * device tree to id as given to the PSE control ops
+ * @nr_lines: number of PSE controls in this controller device
+ * @lock: Mutex for serialization access to the PSE controller
+ */
+struct pse_controller_dev {
+ const struct pse_controller_ops *ops;
+ struct module *owner;
+ struct list_head list;
+ struct list_head pse_control_head;
+ struct device *dev;
+ int of_pse_n_cells;
+ int (*of_xlate)(struct pse_controller_dev *pcdev,
+ const struct of_phandle_args *pse_spec);
+ unsigned int nr_lines;
+ struct mutex lock;
+};
+
+#if IS_ENABLED(CONFIG_PSE_CONTROLLER)
+int pse_controller_register(struct pse_controller_dev *pcdev);
+void pse_controller_unregister(struct pse_controller_dev *pcdev);
+struct device;
+int devm_pse_controller_register(struct device *dev,
+ struct pse_controller_dev *pcdev);
+
+struct pse_control *of_pse_control_get(struct device_node *node);
+void pse_control_put(struct pse_control *psec);
+
+int pse_ethtool_get_status(struct pse_control *psec,
+ struct netlink_ext_ack *extack,
+ struct pse_control_status *status);
+int pse_ethtool_set_config(struct pse_control *psec,
+ struct netlink_ext_ack *extack,
+ const struct pse_control_config *config);
+
+#else
+
+static inline struct pse_control *of_pse_control_get(struct device_node *node)
+{
+ return ERR_PTR(-ENOENT);
+}
+
+static inline void pse_control_put(struct pse_control *psec)
+{
+}
+
+static inline int pse_ethtool_get_status(struct pse_control *psec,
+ struct netlink_ext_ack *extack,
+ struct pse_control_status *status)
+{
+ return -ENOTSUPP;
+}
+
+static inline int pse_ethtool_set_config(struct pse_control *psec,
+ struct netlink_ext_ack *extack,
+ const struct pse_control_config *config)
+{
+ return -ENOTSUPP;
+}
+
+#endif
+
+#endif
diff --git a/include/linux/psi.h b/include/linux/psi.h
index 89784763d19e..b029a847def1 100644
--- a/include/linux/psi.h
+++ b/include/linux/psi.h
@@ -7,6 +7,7 @@
#include <linux/sched.h>
#include <linux/poll.h>
#include <linux/cgroup-defs.h>
+#include <linux/cgroup.h>
struct seq_file;
struct css_set;
@@ -18,25 +19,27 @@ extern struct psi_group psi_system;
void psi_init(void);
-void psi_task_change(struct task_struct *task, int clear, int set);
-void psi_task_switch(struct task_struct *prev, struct task_struct *next,
- bool sleep);
-
void psi_memstall_enter(unsigned long *flags);
void psi_memstall_leave(unsigned long *flags);
int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res);
struct psi_trigger *psi_trigger_create(struct psi_group *group,
- char *buf, size_t nbytes, enum psi_res res);
+ char *buf, enum psi_res res);
void psi_trigger_destroy(struct psi_trigger *t);
__poll_t psi_trigger_poll(void **trigger_ptr, struct file *file,
poll_table *wait);
#ifdef CONFIG_CGROUPS
+static inline struct psi_group *cgroup_psi(struct cgroup *cgrp)
+{
+ return cgroup_ino(cgrp) == 1 ? &psi_system : cgrp->psi;
+}
+
int psi_cgroup_alloc(struct cgroup *cgrp);
void psi_cgroup_free(struct cgroup *cgrp);
void cgroup_move_task(struct task_struct *p, struct css_set *to);
+void psi_cgroup_restart(struct psi_group *group);
#endif
#else /* CONFIG_PSI */
@@ -58,6 +61,7 @@ static inline void cgroup_move_task(struct task_struct *p, struct css_set *to)
{
rcu_assign_pointer(p->cgroups, to);
}
+static inline void psi_cgroup_restart(struct psi_group *group) {}
#endif
#endif /* CONFIG_PSI */
diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h
index c7fe7c089718..1e0a0d7ace3a 100644
--- a/include/linux/psi_types.h
+++ b/include/linux/psi_types.h
@@ -16,13 +16,6 @@ enum psi_task_count {
NR_MEMSTALL,
NR_RUNNING,
/*
- * This can't have values other than 0 or 1 and could be
- * implemented as a bit flag. But for now we still have room
- * in the first cacheline of psi_group_cpu, and this way we
- * don't have to special case any state tracking for it.
- */
- NR_ONCPU,
- /*
* For IO and CPU stalls the presence of running/oncpu tasks
* in the domain means a partial rather than a full stall.
* For memory it's not so simple because of page reclaimers:
@@ -32,22 +25,27 @@ enum psi_task_count {
* threads and memstall ones.
*/
NR_MEMSTALL_RUNNING,
- NR_PSI_TASK_COUNTS = 5,
+ NR_PSI_TASK_COUNTS = 4,
};
/* Task state bitmasks */
#define TSK_IOWAIT (1 << NR_IOWAIT)
#define TSK_MEMSTALL (1 << NR_MEMSTALL)
#define TSK_RUNNING (1 << NR_RUNNING)
-#define TSK_ONCPU (1 << NR_ONCPU)
#define TSK_MEMSTALL_RUNNING (1 << NR_MEMSTALL_RUNNING)
+/* Only one task can be scheduled, no corresponding task count */
+#define TSK_ONCPU (1 << NR_PSI_TASK_COUNTS)
+
/* Resources that workloads could be stalled on */
enum psi_res {
PSI_IO,
PSI_MEM,
PSI_CPU,
- NR_PSI_RESOURCES = 3,
+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
+ PSI_IRQ,
+#endif
+ NR_PSI_RESOURCES,
};
/*
@@ -63,11 +61,20 @@ enum psi_states {
PSI_MEM_FULL,
PSI_CPU_SOME,
PSI_CPU_FULL,
+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
+ PSI_IRQ_FULL,
+#endif
/* Only per-CPU, to weigh the CPU in the global average: */
PSI_NONIDLE,
- NR_PSI_STATES = 7,
+ NR_PSI_STATES,
};
+/* Use one bit in the state mask to track TSK_ONCPU */
+#define PSI_ONCPU (1 << NR_PSI_STATES)
+
+/* Flag whether to re-arm avgs_work, see details in get_recent_times() */
+#define PSI_STATE_RESCHEDULE (1 << (NR_PSI_STATES + 1))
+
enum psi_aggregators {
PSI_AVGS = 0,
PSI_POLL,
@@ -147,6 +154,9 @@ struct psi_trigger {
};
struct psi_group {
+ struct psi_group *parent;
+ bool enabled;
+
/* Protects data used by the aggregator */
struct mutex avgs_lock;
@@ -170,6 +180,7 @@ struct psi_group {
struct timer_list poll_timer;
wait_queue_head_t poll_wait;
atomic_t poll_wakeup;
+ atomic_t poll_scheduled;
/* Protects data used by the monitor */
struct mutex trigger_lock;
@@ -188,6 +199,8 @@ struct psi_group {
#else /* CONFIG_PSI */
+#define NR_PSI_RESOURCES 0
+
struct psi_group { };
#endif /* CONFIG_PSI */
diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h
index 9f16afec7290..9d65ff94e216 100644
--- a/include/linux/pstore_ram.h
+++ b/include/linux/pstore_ram.h
@@ -8,28 +8,7 @@
#ifndef __LINUX_PSTORE_RAM_H__
#define __LINUX_PSTORE_RAM_H__
-#include <linux/compiler.h>
-#include <linux/device.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/list.h>
#include <linux/pstore.h>
-#include <linux/types.h>
-
-/*
- * Choose whether access to the RAM zone requires locking or not. If a zone
- * can be written to from different CPUs like with ftrace for example, then
- * PRZ_FLAG_NO_LOCK is used. For all other cases, locking is required.
- */
-#define PRZ_FLAG_NO_LOCK BIT(0)
-/*
- * If a PRZ should only have a single-boot lifetime, this marks it as
- * getting wiped after its contents get copied out after boot.
- */
-#define PRZ_FLAG_ZAP_OLD BIT(1)
-
-struct persistent_ram_buffer;
-struct rs_control;
struct persistent_ram_ecc_info {
int block_size;
@@ -39,84 +18,6 @@ struct persistent_ram_ecc_info {
uint16_t *par;
};
-/**
- * struct persistent_ram_zone - Details of a persistent RAM zone (PRZ)
- * used as a pstore backend
- *
- * @paddr: physical address of the mapped RAM area
- * @size: size of mapping
- * @label: unique name of this PRZ
- * @type: frontend type for this PRZ
- * @flags: holds PRZ_FLAGS_* bits
- *
- * @buffer_lock:
- * locks access to @buffer "size" bytes and "start" offset
- * @buffer:
- * pointer to actual RAM area managed by this PRZ
- * @buffer_size:
- * bytes in @buffer->data (not including any trailing ECC bytes)
- *
- * @par_buffer:
- * pointer into @buffer->data containing ECC bytes for @buffer->data
- * @par_header:
- * pointer into @buffer->data containing ECC bytes for @buffer header
- * (i.e. all fields up to @data)
- * @rs_decoder:
- * RSLIB instance for doing ECC calculations
- * @corrected_bytes:
- * ECC corrected bytes accounting since boot
- * @bad_blocks:
- * ECC uncorrectable bytes accounting since boot
- * @ecc_info:
- * ECC configuration details
- *
- * @old_log:
- * saved copy of @buffer->data prior to most recent wipe
- * @old_log_size:
- * bytes contained in @old_log
- *
- */
-struct persistent_ram_zone {
- phys_addr_t paddr;
- size_t size;
- void *vaddr;
- char *label;
- enum pstore_type_id type;
- u32 flags;
-
- raw_spinlock_t buffer_lock;
- struct persistent_ram_buffer *buffer;
- size_t buffer_size;
-
- char *par_buffer;
- char *par_header;
- struct rs_control *rs_decoder;
- int corrected_bytes;
- int bad_blocks;
- struct persistent_ram_ecc_info ecc_info;
-
- char *old_log;
- size_t old_log_size;
-};
-
-struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
- u32 sig, struct persistent_ram_ecc_info *ecc_info,
- unsigned int memtype, u32 flags, char *label);
-void persistent_ram_free(struct persistent_ram_zone *prz);
-void persistent_ram_zap(struct persistent_ram_zone *prz);
-
-int persistent_ram_write(struct persistent_ram_zone *prz, const void *s,
- unsigned int count);
-int persistent_ram_write_user(struct persistent_ram_zone *prz,
- const void __user *s, unsigned int count);
-
-void persistent_ram_save_old(struct persistent_ram_zone *prz);
-size_t persistent_ram_old_size(struct persistent_ram_zone *prz);
-void *persistent_ram_old(struct persistent_ram_zone *prz);
-void persistent_ram_free_old(struct persistent_ram_zone *prz);
-ssize_t persistent_ram_ecc_string(struct persistent_ram_zone *prz,
- char *str, size_t len);
-
/*
* Ramoops platform data
* @mem_size memory size for ramoops
diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index 92b44161408e..fdffa6a98d79 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -45,6 +45,8 @@ struct system_device_crosststamp;
/**
* struct ptp_system_timestamp - system time corresponding to a PHC timestamp
+ * @pre_ts: system timestamp before capturing PHC
+ * @post_ts: system timestamp after capturing PHC
*/
struct ptp_system_timestamp {
struct timespec64 pre_ts;
@@ -75,12 +77,6 @@ struct ptp_system_timestamp {
* nominal frequency in parts per million, but with a
* 16 bit binary fractional field.
*
- * @adjfreq: Adjusts the frequency of the hardware clock.
- * This method is deprecated. New drivers should implement
- * the @adjfine method instead.
- * parameter delta: Desired frequency offset from nominal frequency
- * in parts per billion
- *
* @adjphase: Adjusts the phase offset of the hardware clock.
* parameter delta: Desired change in nanoseconds.
*
@@ -172,7 +168,6 @@ struct ptp_clock_info {
int pps;
struct ptp_pin_desc *pin_config;
int (*adjfine)(struct ptp_clock_info *ptp, long scaled_ppm);
- int (*adjfreq)(struct ptp_clock_info *ptp, s32 delta);
int (*adjphase)(struct ptp_clock_info *ptp, s32 phase);
int (*adjtime)(struct ptp_clock_info *ptp, s64 delta);
int (*gettime64)(struct ptp_clock_info *ptp, struct timespec64 *ts);
@@ -246,6 +241,52 @@ static inline long scaled_ppm_to_ppb(long ppm)
return (long)ppb;
}
+/**
+ * diff_by_scaled_ppm - Calculate difference using scaled ppm
+ * @base: the base increment value to adjust
+ * @scaled_ppm: scaled parts per million to adjust by
+ * @diff: on return, the absolute value of calculated diff
+ *
+ * Calculate the difference to adjust the base increment using scaled parts
+ * per million.
+ *
+ * Use mul_u64_u64_div_u64 to perform the difference calculation in avoid
+ * possible overflow.
+ *
+ * Returns: true if scaled_ppm is negative, false otherwise
+ */
+static inline bool diff_by_scaled_ppm(u64 base, long scaled_ppm, u64 *diff)
+{
+ bool negative = false;
+
+ if (scaled_ppm < 0) {
+ negative = true;
+ scaled_ppm = -scaled_ppm;
+ }
+
+ *diff = mul_u64_u64_div_u64(base, (u64)scaled_ppm, 1000000ULL << 16);
+
+ return negative;
+}
+
+/**
+ * adjust_by_scaled_ppm - Adjust a base increment by scaled parts per million
+ * @base: the base increment value to adjust
+ * @scaled_ppm: scaled parts per million frequency adjustment
+ *
+ * Helper function which calculates a new increment value based on the
+ * requested scaled parts per million adjustment.
+ */
+static inline u64 adjust_by_scaled_ppm(u64 base, long scaled_ppm)
+{
+ u64 diff;
+
+ if (diff_by_scaled_ppm(base, scaled_ppm, &diff))
+ return base - diff;
+
+ return base + diff;
+}
+
#if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
/**
@@ -316,6 +357,11 @@ int ptp_find_pin(struct ptp_clock *ptp,
* should most likely call ptp_find_pin() directly from their
* ptp_clock_info::enable() method.
*
+* @ptp: The clock obtained from ptp_clock_register().
+* @func: One of the ptp_pin_function enumerated values.
+* @chan: The particular functional channel to find.
+* Return: Pin index in the range of zero to ptp_clock_caps.n_pins - 1,
+* or -1 if the auxiliary function cannot be found.
*/
int ptp_find_pin_unlocked(struct ptp_clock *ptp,
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index c952c5ba8fab..eaaef3ffec22 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -389,15 +389,6 @@ static inline void user_single_step_report(struct pt_regs *regs)
#define current_pt_regs() task_pt_regs(current)
#endif
-/*
- * unlike current_pt_regs(), this one is equal to task_pt_regs(current)
- * on *all* architectures; the only reason to have a per-arch definition
- * is optimisation.
- */
-#ifndef signal_pt_regs
-#define signal_pt_regs() task_pt_regs(current)
-#endif
-
#ifndef current_user_stack_pointer
#define current_user_stack_pointer() user_stack_pointer(current_pt_regs())
#endif
diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index 9429930c5566..bba492eea96c 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -403,13 +403,9 @@ struct pwm_device *of_pwm_single_xlate(struct pwm_chip *pc,
const struct of_phandle_args *args);
struct pwm_device *pwm_get(struct device *dev, const char *con_id);
-struct pwm_device *of_pwm_get(struct device *dev, struct device_node *np,
- const char *con_id);
void pwm_put(struct pwm_device *pwm);
struct pwm_device *devm_pwm_get(struct device *dev, const char *con_id);
-struct pwm_device *devm_of_pwm_get(struct device *dev, struct device_node *np,
- const char *con_id);
struct pwm_device *devm_fwnode_pwm_get(struct device *dev,
struct fwnode_handle *fwnode,
const char *con_id);
@@ -482,6 +478,11 @@ static inline int pwmchip_remove(struct pwm_chip *chip)
return -EINVAL;
}
+static inline int devm_pwmchip_add(struct device *dev, struct pwm_chip *chip)
+{
+ return -EINVAL;
+}
+
static inline struct pwm_device *pwm_request_from_chip(struct pwm_chip *chip,
unsigned int index,
const char *label)
@@ -497,14 +498,6 @@ static inline struct pwm_device *pwm_get(struct device *dev,
return ERR_PTR(-ENODEV);
}
-static inline struct pwm_device *of_pwm_get(struct device *dev,
- struct device_node *np,
- const char *con_id)
-{
- might_sleep();
- return ERR_PTR(-ENODEV);
-}
-
static inline void pwm_put(struct pwm_device *pwm)
{
might_sleep();
@@ -517,14 +510,6 @@ static inline struct pwm_device *devm_pwm_get(struct device *dev,
return ERR_PTR(-ENODEV);
}
-static inline struct pwm_device *devm_of_pwm_get(struct device *dev,
- struct device_node *np,
- const char *con_id)
-{
- might_sleep();
- return ERR_PTR(-ENODEV);
-}
-
static inline struct pwm_device *
devm_fwnode_pwm_get(struct device *dev, struct fwnode_handle *fwnode,
const char *con_id)
diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h
index a3fec2de512f..cd1973e6ac4b 100644
--- a/include/linux/pxa2xx_ssp.h
+++ b/include/linux/pxa2xx_ssp.h
@@ -229,6 +229,7 @@ enum pxa_ssp_type {
LPSS_SPT_SSP,
LPSS_BXT_SSP,
LPSS_CNL_SSP,
+ SSP_MAX
};
struct ssp_device {
diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index d6e5a1feb947..f29aaaf2eb21 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -10,17 +10,9 @@
#ifdef __KERNEL__
-/* Set to 1 to use kernel-wide empty_zero_page */
-#define RAID6_USE_EMPTY_ZERO_PAGE 0
#include <linux/blkdev.h>
-/* We need a pre-zeroed page... if we don't want to use the kernel-provided
- one define it here */
-#if RAID6_USE_EMPTY_ZERO_PAGE
-# define raid6_empty_zero_page empty_zero_page
-#else
extern const char raid6_empty_zero_page[PAGE_SIZE];
-#endif
#else /* ! __KERNEL__ */
/* Used for testing in user space */
diff --git a/include/linux/random.h b/include/linux/random.h
index 3fec206487f6..4a2a1de423cd 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -6,7 +6,6 @@
#include <linux/bug.h>
#include <linux/kernel.h>
#include <linux/list.h>
-#include <linux/once.h>
#include <uapi/linux/random.h>
@@ -17,16 +16,16 @@ void __init add_bootloader_randomness(const void *buf, size_t len);
void add_input_randomness(unsigned int type, unsigned int code,
unsigned int value) __latent_entropy;
void add_interrupt_randomness(int irq) __latent_entropy;
-void add_hwgenerator_randomness(const void *buf, size_t len, size_t entropy);
+void add_hwgenerator_randomness(const void *buf, size_t len, size_t entropy, bool sleep_after);
-#if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__)
static inline void add_latent_entropy(void)
{
+#if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__)
add_device_randomness((const void *)&latent_entropy, sizeof(latent_entropy));
-}
#else
-static inline void add_latent_entropy(void) { }
+ add_device_randomness(NULL, 0);
#endif
+}
#if IS_ENABLED(CONFIG_VMGENID)
void add_vmfork_randomness(const void *unique_vm_id, size_t len);
@@ -38,12 +37,10 @@ static inline int unregister_random_vmfork_notifier(struct notifier_block *nb) {
#endif
void get_random_bytes(void *buf, size_t len);
+u8 get_random_u8(void);
+u16 get_random_u16(void);
u32 get_random_u32(void);
u64 get_random_u64(void);
-static inline unsigned int get_random_int(void)
-{
- return get_random_u32();
-}
static inline unsigned long get_random_long(void)
{
#if BITS_PER_LONG == 64
@@ -53,28 +50,76 @@ static inline unsigned long get_random_long(void)
#endif
}
+u32 __get_random_u32_below(u32 ceil);
+
/*
- * On 64-bit architectures, protect against non-terminated C string overflows
- * by zeroing out the first byte of the canary; this leaves 56 bits of entropy.
+ * Returns a random integer in the interval [0, ceil), with uniform
+ * distribution, suitable for all uses. Fastest when ceil is a constant, but
+ * still fast for variable ceil as well.
*/
-#ifdef CONFIG_64BIT
-# ifdef __LITTLE_ENDIAN
-# define CANARY_MASK 0xffffffffffffff00UL
-# else /* big endian, 64 bits: */
-# define CANARY_MASK 0x00ffffffffffffffUL
-# endif
-#else /* 32 bits: */
-# define CANARY_MASK 0xffffffffUL
-#endif
+static inline u32 get_random_u32_below(u32 ceil)
+{
+ if (!__builtin_constant_p(ceil))
+ return __get_random_u32_below(ceil);
+
+ /*
+ * For the fast path, below, all operations on ceil are precomputed by
+ * the compiler, so this incurs no overhead for checking pow2, doing
+ * divisions, or branching based on integer size. The resultant
+ * algorithm does traditional reciprocal multiplication (typically
+ * optimized by the compiler into shifts and adds), rejecting samples
+ * whose lower half would indicate a range indivisible by ceil.
+ */
+ BUILD_BUG_ON_MSG(!ceil, "get_random_u32_below() must take ceil > 0");
+ if (ceil <= 1)
+ return 0;
+ for (;;) {
+ if (ceil <= 1U << 8) {
+ u32 mult = ceil * get_random_u8();
+ if (likely(is_power_of_2(ceil) || (u8)mult >= (1U << 8) % ceil))
+ return mult >> 8;
+ } else if (ceil <= 1U << 16) {
+ u32 mult = ceil * get_random_u16();
+ if (likely(is_power_of_2(ceil) || (u16)mult >= (1U << 16) % ceil))
+ return mult >> 16;
+ } else {
+ u64 mult = (u64)ceil * get_random_u32();
+ if (likely(is_power_of_2(ceil) || (u32)mult >= -ceil % ceil))
+ return mult >> 32;
+ }
+ }
+}
-static inline unsigned long get_random_canary(void)
+/*
+ * Returns a random integer in the interval (floor, U32_MAX], with uniform
+ * distribution, suitable for all uses. Fastest when floor is a constant, but
+ * still fast for variable floor as well.
+ */
+static inline u32 get_random_u32_above(u32 floor)
{
- return get_random_long() & CANARY_MASK;
+ BUILD_BUG_ON_MSG(__builtin_constant_p(floor) && floor == U32_MAX,
+ "get_random_u32_above() must take floor < U32_MAX");
+ return floor + 1 + get_random_u32_below(U32_MAX - floor);
}
-int __init random_init(const char *command_line);
+/*
+ * Returns a random integer in the interval [floor, ceil], with uniform
+ * distribution, suitable for all uses. Fastest when floor and ceil are
+ * constant, but still fast for variable floor and ceil as well.
+ */
+static inline u32 get_random_u32_inclusive(u32 floor, u32 ceil)
+{
+ BUILD_BUG_ON_MSG(__builtin_constant_p(floor) && __builtin_constant_p(ceil) &&
+ (floor > ceil || ceil - floor == U32_MAX),
+ "get_random_u32_inclusive() must take floor <= ceil");
+ return floor + get_random_u32_below(ceil - floor + 1);
+}
+
+void __init random_init_early(const char *command_line);
+void __init random_init(void);
bool rng_is_initialized(void);
int wait_for_random_bytes(void);
+int execute_with_initialized_rng(struct notifier_block *nb);
/* Calls wait_for_random_bytes() and then calls get_random_bytes(buf, nbytes).
* Returns the result of the call to wait_for_random_bytes. */
@@ -93,9 +138,10 @@ static inline int get_random_bytes_wait(void *buf, size_t nbytes)
*out = get_random_ ## name(); \
return 0; \
}
+declare_get_random_var_wait(u8, u8)
+declare_get_random_var_wait(u16, u16)
declare_get_random_var_wait(u32, u32)
declare_get_random_var_wait(u64, u32)
-declare_get_random_var_wait(int, unsigned int)
declare_get_random_var_wait(long, unsigned long)
#undef declare_get_random_var
@@ -108,26 +154,6 @@ declare_get_random_var_wait(long, unsigned long)
#include <asm/archrandom.h>
-/*
- * Called from the boot CPU during startup; not valid to call once
- * secondary CPUs are up and preemption is possible.
- */
-#ifndef arch_get_random_seed_longs_early
-static inline size_t __init arch_get_random_seed_longs_early(unsigned long *v, size_t max_longs)
-{
- WARN_ON(system_state != SYSTEM_BOOTING);
- return arch_get_random_seed_longs(v, max_longs);
-}
-#endif
-
-#ifndef arch_get_random_longs_early
-static inline bool __init arch_get_random_longs_early(unsigned long *v, size_t max_longs)
-{
- WARN_ON(system_state != SYSTEM_BOOTING);
- return arch_get_random_longs(v, max_longs);
-}
-#endif
-
#ifdef CONFIG_SMP
int random_prepare_cpu(unsigned int cpu);
int random_online_cpu(unsigned int cpu);
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index f527f27e6438..03abf883a281 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -42,7 +42,31 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func);
void rcu_barrier_tasks(void);
void rcu_barrier_tasks_rude(void);
void synchronize_rcu(void);
+
+struct rcu_gp_oldstate;
unsigned long get_completed_synchronize_rcu(void);
+void get_completed_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp);
+
+// Maximum number of unsigned long values corresponding to
+// not-yet-completed RCU grace periods.
+#define NUM_ACTIVE_RCU_POLL_OLDSTATE 2
+
+/**
+ * same_state_synchronize_rcu - Are two old-state values identical?
+ * @oldstate1: First old-state value.
+ * @oldstate2: Second old-state value.
+ *
+ * The two old-state values must have been obtained from either
+ * get_state_synchronize_rcu(), start_poll_synchronize_rcu(), or
+ * get_completed_synchronize_rcu(). Returns @true if the two values are
+ * identical and @false otherwise. This allows structures whose lifetimes
+ * are tracked by old-state values to push these values to a list header,
+ * allowing those structures to be slightly smaller.
+ */
+static inline bool same_state_synchronize_rcu(unsigned long oldstate1, unsigned long oldstate2)
+{
+ return oldstate1 == oldstate2;
+}
#ifdef CONFIG_PREEMPT_RCU
@@ -84,6 +108,15 @@ static inline int rcu_preempt_depth(void)
#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
+#ifdef CONFIG_RCU_LAZY
+void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func);
+#else
+static inline void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func)
+{
+ call_rcu(head, func);
+}
+#endif
+
/* Internal to kernel */
void rcu_init(void);
extern int rcu_scheduler_active;
@@ -217,6 +250,18 @@ static inline void exit_tasks_rcu_finish(void) { }
#endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */
/**
+ * rcu_trace_implies_rcu_gp - does an RCU Tasks Trace grace period imply an RCU grace period?
+ *
+ * As an accident of implementation, an RCU Tasks Trace grace period also
+ * acts as an RCU grace period. However, this could change at any time.
+ * Code relying on this accident must call this function to verify that
+ * this accident is still happening.
+ *
+ * You have been warned!
+ */
+static inline bool rcu_trace_implies_rcu_gp(void) { return true; }
+
+/**
* cond_resched_tasks_rcu_qs - Report potential quiescent states to RCU
*
* This macro resembles cond_resched(), except that it is defined to
@@ -316,6 +361,11 @@ static inline int rcu_read_lock_any_held(void)
return !preemptible();
}
+static inline int debug_lockdep_rcu_enabled(void)
+{
+ return 0;
+}
+
#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
#ifdef CONFIG_PROVE_RCU
@@ -496,13 +546,21 @@ do { \
* against NULL. Although rcu_access_pointer() may also be used in cases
* where update-side locks prevent the value of the pointer from changing,
* you should instead use rcu_dereference_protected() for this use case.
+ * Within an RCU read-side critical section, there is little reason to
+ * use rcu_access_pointer().
+ *
+ * It is usually best to test the rcu_access_pointer() return value
+ * directly in order to avoid accidental dereferences being introduced
+ * by later inattentive changes. In other words, assigning the
+ * rcu_access_pointer() return value to a local variable results in an
+ * accident waiting to happen.
*
* It is also permissible to use rcu_access_pointer() when read-side
- * access to the pointer was removed at least one grace period ago, as
- * is the case in the context of the RCU callback that is freeing up
- * the data, or after a synchronize_rcu() returns. This can be useful
- * when tearing down multi-linked structures after a grace period
- * has elapsed.
+ * access to the pointer was removed at least one grace period ago, as is
+ * the case in the context of the RCU callback that is freeing up the data,
+ * or after a synchronize_rcu() returns. This can be useful when tearing
+ * down multi-linked structures after a grace period has elapsed. However,
+ * rcu_dereference_protected() is normally preferred for this use case.
*/
#define rcu_access_pointer(p) __rcu_access_pointer((p), __UNIQUE_ID(rcu), __rcu)
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 62815c0a2dce..68f9070aa111 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -14,25 +14,75 @@
#include <asm/param.h> /* for HZ */
+struct rcu_gp_oldstate {
+ unsigned long rgos_norm;
+};
+
+// Maximum number of rcu_gp_oldstate values corresponding to
+// not-yet-completed RCU grace periods.
+#define NUM_ACTIVE_RCU_POLL_FULL_OLDSTATE 2
+
+/*
+ * Are the two oldstate values the same? See the Tree RCU version for
+ * docbook header.
+ */
+static inline bool same_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp1,
+ struct rcu_gp_oldstate *rgosp2)
+{
+ return rgosp1->rgos_norm == rgosp2->rgos_norm;
+}
+
unsigned long get_state_synchronize_rcu(void);
+
+static inline void get_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp)
+{
+ rgosp->rgos_norm = get_state_synchronize_rcu();
+}
+
unsigned long start_poll_synchronize_rcu(void);
+
+static inline void start_poll_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp)
+{
+ rgosp->rgos_norm = start_poll_synchronize_rcu();
+}
+
bool poll_state_synchronize_rcu(unsigned long oldstate);
+static inline bool poll_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp)
+{
+ return poll_state_synchronize_rcu(rgosp->rgos_norm);
+}
+
static inline void cond_synchronize_rcu(unsigned long oldstate)
{
might_sleep();
}
+static inline void cond_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp)
+{
+ cond_synchronize_rcu(rgosp->rgos_norm);
+}
+
static inline unsigned long start_poll_synchronize_rcu_expedited(void)
{
return start_poll_synchronize_rcu();
}
+static inline void start_poll_synchronize_rcu_expedited_full(struct rcu_gp_oldstate *rgosp)
+{
+ rgosp->rgos_norm = start_poll_synchronize_rcu_expedited();
+}
+
static inline void cond_synchronize_rcu_expedited(unsigned long oldstate)
{
cond_synchronize_rcu(oldstate);
}
+static inline void cond_synchronize_rcu_expedited_full(struct rcu_gp_oldstate *rgosp)
+{
+ cond_synchronize_rcu_expedited(rgosp->rgos_norm);
+}
+
extern void rcu_barrier(void);
static inline void synchronize_rcu_expedited(void)
@@ -92,23 +142,17 @@ static inline int rcu_needs_cpu(void)
* Take advantage of the fact that there is only one CPU, which
* allows us to ignore virtualization-based context switches.
*/
-static inline void rcu_virt_note_context_switch(int cpu) { }
+static inline void rcu_virt_note_context_switch(void) { }
static inline void rcu_cpu_stall_reset(void) { }
static inline int rcu_jiffies_till_stall_check(void) { return 21 * HZ; }
static inline void rcu_irq_exit_check_preempt(void) { }
-#define rcu_is_idle_cpu(cpu) \
- (is_idle_task(current) && !in_nmi() && !in_hardirq() && !in_serving_softirq())
static inline void exit_rcu(void) { }
static inline bool rcu_preempt_need_deferred_qs(struct task_struct *t)
{
return false;
}
static inline void rcu_preempt_deferred_qs(struct task_struct *t) { }
-#ifdef CONFIG_SRCU
void rcu_scheduler_starting(void);
-#else /* #ifndef CONFIG_SRCU */
-static inline void rcu_scheduler_starting(void) { }
-#endif /* #else #ifndef CONFIG_SRCU */
static inline void rcu_end_inkernel_boot(void) { }
static inline bool rcu_inkernel_boot_has_ended(void) { return true; }
static inline bool rcu_is_watching(void) { return true; }
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 47eaa4cb0df7..4003bf6cfa1c 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -27,7 +27,7 @@ void rcu_cpu_stall_reset(void);
* wrapper around rcu_note_context_switch(), which allows TINY_RCU
* to save a few bytes. The caller must have disabled interrupts.
*/
-static inline void rcu_virt_note_context_switch(int cpu)
+static inline void rcu_virt_note_context_switch(void)
{
rcu_note_context_switch(false);
}
@@ -40,14 +40,52 @@ bool rcu_eqs_special_set(int cpu);
void rcu_momentary_dyntick_idle(void);
void kfree_rcu_scheduler_running(void);
bool rcu_gp_might_be_stalled(void);
+
+struct rcu_gp_oldstate {
+ unsigned long rgos_norm;
+ unsigned long rgos_exp;
+};
+
+// Maximum number of rcu_gp_oldstate values corresponding to
+// not-yet-completed RCU grace periods.
+#define NUM_ACTIVE_RCU_POLL_FULL_OLDSTATE 4
+
+/**
+ * same_state_synchronize_rcu_full - Are two old-state values identical?
+ * @rgosp1: First old-state value.
+ * @rgosp2: Second old-state value.
+ *
+ * The two old-state values must have been obtained from either
+ * get_state_synchronize_rcu_full(), start_poll_synchronize_rcu_full(),
+ * or get_completed_synchronize_rcu_full(). Returns @true if the two
+ * values are identical and @false otherwise. This allows structures
+ * whose lifetimes are tracked by old-state values to push these values
+ * to a list header, allowing those structures to be slightly smaller.
+ *
+ * Note that equality is judged on a bitwise basis, so that an
+ * @rcu_gp_oldstate structure with an already-completed state in one field
+ * will compare not-equal to a structure with an already-completed state
+ * in the other field. After all, the @rcu_gp_oldstate structure is opaque
+ * so how did such a situation come to pass in the first place?
+ */
+static inline bool same_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp1,
+ struct rcu_gp_oldstate *rgosp2)
+{
+ return rgosp1->rgos_norm == rgosp2->rgos_norm && rgosp1->rgos_exp == rgosp2->rgos_exp;
+}
+
unsigned long start_poll_synchronize_rcu_expedited(void);
+void start_poll_synchronize_rcu_expedited_full(struct rcu_gp_oldstate *rgosp);
void cond_synchronize_rcu_expedited(unsigned long oldstate);
+void cond_synchronize_rcu_expedited_full(struct rcu_gp_oldstate *rgosp);
unsigned long get_state_synchronize_rcu(void);
+void get_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp);
unsigned long start_poll_synchronize_rcu(void);
+void start_poll_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp);
bool poll_state_synchronize_rcu(unsigned long oldstate);
+bool poll_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp);
void cond_synchronize_rcu(unsigned long oldstate);
-
-bool rcu_is_idle_cpu(int cpu);
+void cond_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp);
#ifdef CONFIG_PROVE_RCU
void rcu_irq_exit_check_preempt(void);
diff --git a/include/linux/reboot.h b/include/linux/reboot.h
index e5d9ef886179..2b6bb593be5b 100644
--- a/include/linux/reboot.h
+++ b/include/linux/reboot.h
@@ -106,6 +106,14 @@ enum sys_off_mode {
SYS_OFF_MODE_POWER_OFF,
/**
+ * @SYS_OFF_MODE_RESTART_PREPARE:
+ *
+ * Handlers prepare system to be restarted. Handlers are
+ * allowed to sleep.
+ */
+ SYS_OFF_MODE_RESTART_PREPARE,
+
+ /**
* @SYS_OFF_MODE_RESTART:
*
* Handlers restart system. Handlers are disallowed to sleep.
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 7cf2157134ac..a3bc695bcca0 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -24,6 +24,7 @@ struct module;
struct clk;
struct device;
struct device_node;
+struct fsi_device;
struct i2c_client;
struct i3c_device;
struct irq_domain;
@@ -311,6 +312,8 @@ typedef void (*regmap_unlock)(void *);
* This field is a duplicate of a similar file in
* 'struct regmap_bus' and serves exact same purpose.
* Use it only for "no-bus" cases.
+ * @io_port: Support IO port accessors. Makes sense only when MMIO vs. IO port
+ * access can be distinguished.
* @max_register: Optional, specifies the maximum valid register address.
* @wr_table: Optional, points to a struct regmap_access_table specifying
* valid ranges for write access.
@@ -399,6 +402,7 @@ struct regmap_config {
size_t max_raw_write;
bool fast_io;
+ bool io_port;
unsigned int max_register;
const struct regmap_access_table *wr_table;
@@ -489,8 +493,12 @@ typedef int (*regmap_hw_read)(void *context,
void *val_buf, size_t val_size);
typedef int (*regmap_hw_reg_read)(void *context, unsigned int reg,
unsigned int *val);
+typedef int (*regmap_hw_reg_noinc_read)(void *context, unsigned int reg,
+ void *val, size_t val_count);
typedef int (*regmap_hw_reg_write)(void *context, unsigned int reg,
unsigned int val);
+typedef int (*regmap_hw_reg_noinc_write)(void *context, unsigned int reg,
+ const void *val, size_t val_count);
typedef int (*regmap_hw_reg_update_bits)(void *context, unsigned int reg,
unsigned int mask, unsigned int val);
typedef struct regmap_async *(*regmap_hw_async_alloc)(void);
@@ -511,6 +519,8 @@ typedef void (*regmap_hw_free_context)(void *context);
* must serialise with respect to non-async I/O.
* @reg_write: Write a single register value to the given register address. This
* write operation has to complete when returning from the function.
+ * @reg_write_noinc: Write multiple register value to the same register. This
+ * write operation has to complete when returning from the function.
* @reg_update_bits: Update bits operation to be used against volatile
* registers, intended for devices supporting some mechanism
* for setting clearing bits without having to
@@ -538,9 +548,11 @@ struct regmap_bus {
regmap_hw_gather_write gather_write;
regmap_hw_async_write async_write;
regmap_hw_reg_write reg_write;
+ regmap_hw_reg_noinc_write reg_noinc_write;
regmap_hw_reg_update_bits reg_update_bits;
regmap_hw_read read;
regmap_hw_reg_read reg_read;
+ regmap_hw_reg_noinc_read reg_noinc_read;
regmap_hw_free_context free_context;
regmap_hw_async_alloc async_alloc;
u8 read_flag_mask;
@@ -617,6 +629,10 @@ struct regmap *__regmap_init_spi_avmm(struct spi_device *spi,
const struct regmap_config *config,
struct lock_class_key *lock_key,
const char *lock_name);
+struct regmap *__regmap_init_fsi(struct fsi_device *fsi_dev,
+ const struct regmap_config *config,
+ struct lock_class_key *lock_key,
+ const char *lock_name);
struct regmap *__devm_regmap_init(struct device *dev,
const struct regmap_bus *bus,
@@ -682,6 +698,11 @@ struct regmap *__devm_regmap_init_spi_avmm(struct spi_device *spi,
const struct regmap_config *config,
struct lock_class_key *lock_key,
const char *lock_name);
+struct regmap *__devm_regmap_init_fsi(struct fsi_device *fsi_dev,
+ const struct regmap_config *config,
+ struct lock_class_key *lock_key,
+ const char *lock_name);
+
/*
* Wrapper for regmap_init macros to include a unique lockdep key and name
* for each call. No-op if CONFIG_LOCKDEP is not set.
@@ -909,6 +930,19 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg);
spi, config)
/**
+ * regmap_init_fsi() - Initialise register map
+ *
+ * @fsi_dev: Device that will be interacted with
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer to
+ * a struct regmap.
+ */
+#define regmap_init_fsi(fsi_dev, config) \
+ __regmap_lockdep_wrapper(__regmap_init_fsi, #config, fsi_dev, \
+ config)
+
+/**
* devm_regmap_init() - Initialise managed register map
*
* @dev: Device that will be interacted with
@@ -1137,6 +1171,20 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg);
__regmap_lockdep_wrapper(__devm_regmap_init_spi_avmm, #config, \
spi, config)
+/**
+ * devm_regmap_init_fsi() - Initialise managed register map
+ *
+ * @fsi_dev: Device that will be interacted with
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer
+ * to a struct regmap. The regmap will be automatically freed by the
+ * device management code.
+ */
+#define devm_regmap_init_fsi(fsi_dev, config) \
+ __regmap_lockdep_wrapper(__devm_regmap_init_fsi, #config, \
+ fsi_dev, config)
+
int regmap_mmio_attach_clk(struct regmap *map, struct clk *clk);
void regmap_mmio_detach_clk(struct regmap *map);
void regmap_exit(struct regmap *map);
@@ -1208,6 +1256,7 @@ static inline int regmap_write_bits(struct regmap *map, unsigned int reg,
int regmap_get_val_bytes(struct regmap *map);
int regmap_get_max_register(struct regmap *map);
int regmap_get_reg_stride(struct regmap *map);
+bool regmap_might_sleep(struct regmap *map);
int regmap_async_complete(struct regmap *map);
bool regmap_can_raw_write(struct regmap *map);
size_t regmap_get_raw_read_max(struct regmap *map);
@@ -1531,6 +1580,8 @@ struct regmap_irq_chip_data;
* before regmap_irq_handler process the interrupts.
* @handle_post_irq: Driver specific callback to handle interrupt from device
* after handling the interrupts in regmap_irq_handler().
+ * @handle_mask_sync: Callback used to handle IRQ mask syncs. The index will be
+ * in the range [0, num_regs)
* @set_type_virt: Driver specific callback to extend regmap_irq_set_type()
* and configure virt regs. Deprecated, use @set_type_config
* callback and config registers instead.
@@ -1592,6 +1643,9 @@ struct regmap_irq_chip {
int (*handle_pre_irq)(void *irq_drv_data);
int (*handle_post_irq)(void *irq_drv_data);
+ int (*handle_mask_sync)(struct regmap *map, int index,
+ unsigned int mask_buf_def,
+ unsigned int mask_buf, void *irq_drv_data);
int (*set_type_virt)(unsigned int **buf, unsigned int type,
unsigned long hwirq, int reg);
int (*set_type_config)(unsigned int **buf, unsigned int type,
@@ -1894,6 +1948,12 @@ static inline int regmap_get_reg_stride(struct regmap *map)
return -EINVAL;
}
+static inline bool regmap_might_sleep(struct regmap *map)
+{
+ WARN_ONCE(1, "regmap API is disabled");
+ return true;
+}
+
static inline int regcache_sync(struct regmap *map)
{
WARN_ONCE(1, "regmap API is disabled");
diff --git a/include/linux/regset.h b/include/linux/regset.h
index a00765f0e8cf..9061266dd8de 100644
--- a/include/linux/regset.h
+++ b/include/linux/regset.h
@@ -275,15 +275,15 @@ static inline int user_regset_copyin(unsigned int *pos, unsigned int *count,
return 0;
}
-static inline int user_regset_copyin_ignore(unsigned int *pos,
- unsigned int *count,
- const void **kbuf,
- const void __user **ubuf,
- const int start_pos,
- const int end_pos)
+static inline void user_regset_copyin_ignore(unsigned int *pos,
+ unsigned int *count,
+ const void **kbuf,
+ const void __user **ubuf,
+ const int start_pos,
+ const int end_pos)
{
if (*count == 0)
- return 0;
+ return;
BUG_ON(*pos < start_pos);
if (end_pos < 0 || *pos < end_pos) {
unsigned int copy = (end_pos < 0 ? *count
@@ -295,7 +295,6 @@ static inline int user_regset_copyin_ignore(unsigned int *pos,
*pos += copy;
*count -= copy;
}
- return 0;
}
extern int regset_get(struct task_struct *target,
diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index bc6cda706d1f..39b666b40ea6 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -207,6 +207,8 @@ struct regulator *__must_check regulator_get_optional(struct device *dev,
const char *id);
struct regulator *__must_check devm_regulator_get_optional(struct device *dev,
const char *id);
+int devm_regulator_get_enable(struct device *dev, const char *id);
+int devm_regulator_get_enable_optional(struct device *dev, const char *id);
void regulator_put(struct regulator *regulator);
void devm_regulator_put(struct regulator *regulator);
@@ -242,14 +244,21 @@ int regulator_disable_deferred(struct regulator *regulator, int ms);
int __must_check regulator_bulk_get(struct device *dev, int num_consumers,
struct regulator_bulk_data *consumers);
+int __must_check of_regulator_bulk_get_all(struct device *dev, struct device_node *np,
+ struct regulator_bulk_data **consumers);
int __must_check devm_regulator_bulk_get(struct device *dev, int num_consumers,
struct regulator_bulk_data *consumers);
+void devm_regulator_bulk_put(struct regulator_bulk_data *consumers);
+int __must_check devm_regulator_bulk_get_exclusive(struct device *dev, int num_consumers,
+ struct regulator_bulk_data *consumers);
int __must_check devm_regulator_bulk_get_const(
struct device *dev, int num_consumers,
const struct regulator_bulk_data *in_consumers,
struct regulator_bulk_data **out_consumers);
int __must_check regulator_bulk_enable(int num_consumers,
struct regulator_bulk_data *consumers);
+int devm_regulator_bulk_get_enable(struct device *dev, int num_consumers,
+ const char * const *id);
int regulator_bulk_disable(int num_consumers,
struct regulator_bulk_data *consumers);
int regulator_bulk_force_disable(int num_consumers,
@@ -354,6 +363,17 @@ devm_regulator_get_exclusive(struct device *dev, const char *id)
return ERR_PTR(-ENODEV);
}
+static inline int devm_regulator_get_enable(struct device *dev, const char *id)
+{
+ return -ENODEV;
+}
+
+static inline int devm_regulator_get_enable_optional(struct device *dev,
+ const char *id)
+{
+ return -ENODEV;
+}
+
static inline struct regulator *__must_check
regulator_get_optional(struct device *dev, const char *id)
{
@@ -375,6 +395,10 @@ static inline void devm_regulator_put(struct regulator *regulator)
{
}
+static inline void devm_regulator_bulk_put(struct regulator_bulk_data *consumers)
+{
+}
+
static inline int regulator_register_supply_alias(struct device *dev,
const char *id,
struct device *alias_dev,
@@ -459,12 +483,25 @@ static inline int devm_regulator_bulk_get(struct device *dev, int num_consumers,
return 0;
}
+static inline int of_regulator_bulk_get_all(struct device *dev, struct device_node *np,
+ struct regulator_bulk_data **consumers)
+{
+ return 0;
+}
+
static inline int regulator_bulk_enable(int num_consumers,
struct regulator_bulk_data *consumers)
{
return 0;
}
+static inline int devm_regulator_bulk_get_enable(struct device *dev,
+ int num_consumers,
+ const char * const *id)
+{
+ return 0;
+}
+
static inline int regulator_bulk_disable(int num_consumers,
struct regulator_bulk_data *consumers)
{
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index f9a7461e72b8..d3b4a3d4514a 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -687,7 +687,8 @@ static inline int regulator_err2notif(int err)
struct regulator_dev *
-regulator_register(const struct regulator_desc *regulator_desc,
+regulator_register(struct device *dev,
+ const struct regulator_desc *regulator_desc,
const struct regulator_config *config);
struct regulator_dev *
devm_regulator_register(struct device *dev,
diff --git a/include/linux/regulator/gpio-regulator.h b/include/linux/regulator/gpio-regulator.h
index fdeb312cdabd..c223e50ff9f7 100644
--- a/include/linux/regulator/gpio-regulator.h
+++ b/include/linux/regulator/gpio-regulator.h
@@ -42,6 +42,7 @@ struct gpio_regulator_state {
/**
* struct gpio_regulator_config - config structure
* @supply_name: Name of the regulator supply
+ * @input_supply: Name of the input regulator supply
* @enabled_at_boot: Whether regulator has been enabled at
* boot or not. 1 = Yes, 0 = No
* This is used to keep the regulator at
@@ -62,6 +63,7 @@ struct gpio_regulator_state {
*/
struct gpio_regulator_config {
const char *supply_name;
+ const char *input_supply;
unsigned enabled_at_boot:1;
unsigned startup_delay;
diff --git a/include/linux/regulator/mt6331-regulator.h b/include/linux/regulator/mt6331-regulator.h
new file mode 100644
index 000000000000..2801a9879c14
--- /dev/null
+++ b/include/linux/regulator/mt6331-regulator.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 Collabora Ltd.
+ * Author: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
+ */
+
+#ifndef __LINUX_REGULATOR_MT6331_H
+#define __LINUX_REGULATOR_MT6331_H
+
+enum {
+ /* BUCK */
+ MT6331_ID_VDVFS11 = 0,
+ MT6331_ID_VDVFS12,
+ MT6331_ID_VDVFS13,
+ MT6331_ID_VDVFS14,
+ MT6331_ID_VCORE2,
+ MT6331_ID_VIO18,
+ /* LDO */
+ MT6331_ID_VTCXO1,
+ MT6331_ID_VTCXO2,
+ MT6331_ID_AVDD32_AUD,
+ MT6331_ID_VAUXA32,
+ MT6331_ID_VCAMA,
+ MT6331_ID_VIO28,
+ MT6331_ID_VCAM_AF,
+ MT6331_ID_VMC,
+ MT6331_ID_VMCH,
+ MT6331_ID_VEMC33,
+ MT6331_ID_VGP1,
+ MT6331_ID_VSIM1,
+ MT6331_ID_VSIM2,
+ MT6331_ID_VMIPI,
+ MT6331_ID_VIBR,
+ MT6331_ID_VGP4,
+ MT6331_ID_VCAMD,
+ MT6331_ID_VUSB10,
+ MT6331_ID_VCAM_IO,
+ MT6331_ID_VSRAM_DVFS1,
+ MT6331_ID_VGP2,
+ MT6331_ID_VGP3,
+ MT6331_ID_VRTC,
+ MT6331_ID_VDIG18,
+ MT6331_ID_VREG_MAX
+};
+
+#endif /* __LINUX_REGULATOR_MT6331_H */
diff --git a/include/linux/regulator/mt6332-regulator.h b/include/linux/regulator/mt6332-regulator.h
new file mode 100644
index 000000000000..af5e3ed31029
--- /dev/null
+++ b/include/linux/regulator/mt6332-regulator.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 Collabora Ltd.
+ * Author: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
+ */
+
+#ifndef __LINUX_REGULATOR_MT6332_H
+#define __LINUX_REGULATOR_MT6332_H
+
+enum {
+ /* BUCK */
+ MT6332_ID_VDRAM = 0,
+ MT6332_ID_VDVFS2,
+ MT6332_ID_VPA,
+ MT6332_ID_VRF1,
+ MT6332_ID_VRF2,
+ MT6332_ID_VSBST,
+ /* LDO */
+ MT6332_ID_VAUXB32,
+ MT6332_ID_VBIF28,
+ MT6332_ID_VDIG18,
+ MT6332_ID_VSRAM_DVFS2,
+ MT6332_ID_VUSB33,
+ MT6332_ID_VREG_MAX
+};
+
+#endif /* __LINUX_REGULATOR_MT6332_H */
diff --git a/include/linux/regulator/mt6357-regulator.h b/include/linux/regulator/mt6357-regulator.h
new file mode 100644
index 000000000000..238b1ee77ea6
--- /dev/null
+++ b/include/linux/regulator/mt6357-regulator.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2022 MediaTek Inc.
+ */
+
+#ifndef __LINUX_REGULATOR_MT6357_H
+#define __LINUX_REGULATOR_MT6357_H
+
+enum {
+ /* Bucks */
+ MT6357_ID_VCORE,
+ MT6357_ID_VMODEM,
+ MT6357_ID_VPA,
+ MT6357_ID_VPROC,
+ MT6357_ID_VS1,
+
+ /* LDOs */
+ MT6357_ID_VAUX18,
+ MT6357_ID_VAUD28,
+ MT6357_ID_VCAMA,
+ MT6357_ID_VCAMD,
+ MT6357_ID_VCAMIO,
+ MT6357_ID_VCN18,
+ MT6357_ID_VCN28,
+ MT6357_ID_VCN33_BT,
+ MT6357_ID_VCN33_WIFI,
+ MT6357_ID_VDRAM,
+ MT6357_ID_VEFUSE,
+ MT6357_ID_VEMC,
+ MT6357_ID_VFE28,
+ MT6357_ID_VIBR,
+ MT6357_ID_VIO18,
+ MT6357_ID_VIO28,
+ MT6357_ID_VLDO28,
+ MT6357_ID_VMC,
+ MT6357_ID_VMCH,
+ MT6357_ID_VRF12,
+ MT6357_ID_VRF18,
+ MT6357_ID_VSIM1,
+ MT6357_ID_VSIM2,
+ MT6357_ID_VSRAM_OTHERS,
+ MT6357_ID_VSRAM_PROC,
+ MT6357_ID_VUSB33,
+ MT6357_ID_VXO22,
+
+ MT6357_ID_RG_MAX,
+};
+
+#define MT6357_MAX_REGULATOR MT6357_ID_RG_MAX
+
+#endif /* __LINUX_REGULATOR_MT6357_H */
diff --git a/include/linux/regulator/userspace-consumer.h b/include/linux/regulator/userspace-consumer.h
index b5dba0628951..2249ee697f8b 100644
--- a/include/linux/regulator/userspace-consumer.h
+++ b/include/linux/regulator/userspace-consumer.h
@@ -21,6 +21,7 @@ struct regulator_userspace_consumer_data {
struct regulator_bulk_data *supplies;
bool init_on;
+ bool no_autoswitch;
};
#endif /* __REGULATOR_PLATFORM_CONSUMER_H_ */
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index aea79c77db0f..fe8978eb69f1 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -490,6 +490,20 @@ struct rproc_dump_segment {
};
/**
+ * enum rproc_features - features supported
+ *
+ * @RPROC_FEAT_ATTACH_ON_RECOVERY: The remote processor does not need help
+ * from Linux to recover, such as firmware
+ * loading. Linux just needs to attach after
+ * recovery.
+ */
+
+enum rproc_features {
+ RPROC_FEAT_ATTACH_ON_RECOVERY,
+ RPROC_MAX_FEATURES,
+};
+
+/**
* struct rproc - represents a physical remote processor device
* @node: list node of this rproc object
* @domain: iommu domain
@@ -530,6 +544,7 @@ struct rproc_dump_segment {
* @elf_machine: firmware ELF machine
* @cdev: character device of the rproc
* @cdev_put_on_release: flag to indicate if remoteproc should be shutdown on @char_dev release
+ * @features: indicate remoteproc features
*/
struct rproc {
struct list_head node;
@@ -570,6 +585,7 @@ struct rproc {
u16 elf_machine;
struct cdev cdev;
bool cdev_put_on_release;
+ DECLARE_BITMAP(features, RPROC_MAX_FEATURES);
};
/**
@@ -616,9 +632,8 @@ struct rproc_vring {
/**
* struct rproc_vdev - remoteproc state for a supported virtio device
- * @refcount: reference counter for the vdev and vring allocations
* @subdev: handle for registering the vdev as a rproc subdevice
- * @dev: device struct used for reference count semantics
+ * @pdev: remoteproc virtio platform device
* @id: virtio device id (as in virtio_ids.h)
* @node: list node
* @rproc: the rproc handle
@@ -627,10 +642,9 @@ struct rproc_vring {
* @index: vdev position versus other vdev declared in resource table
*/
struct rproc_vdev {
- struct kref refcount;
struct rproc_subdev subdev;
- struct device dev;
+ struct platform_device *pdev;
unsigned int id;
struct list_head node;
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 21deb5212bbd..0cee154abc9f 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -15,6 +15,9 @@ int proc_resctrl_show(struct seq_file *m,
#endif
+/* max value for struct rdt_domain's mbps_val */
+#define MBA_MAX_MBPS U32_MAX
+
/**
* enum resctrl_conf_type - The type of configuration.
* @CDP_NONE: No prioritisation, both code and data are controlled or monitored.
@@ -29,6 +32,16 @@ enum resctrl_conf_type {
#define CDP_NUM_TYPES (CDP_DATA + 1)
+/*
+ * Event IDs, the values match those used to program IA32_QM_EVTSEL before
+ * reading IA32_QM_CTR on RDT systems.
+ */
+enum resctrl_event_id {
+ QOS_L3_OCCUP_EVENT_ID = 0x01,
+ QOS_L3_MBM_TOTAL_EVENT_ID = 0x02,
+ QOS_L3_MBM_LOCAL_EVENT_ID = 0x03,
+};
+
/**
* struct resctrl_staged_config - parsed configuration to be applied
* @new_ctrl: new ctrl value to be loaded
@@ -53,6 +66,9 @@ struct resctrl_staged_config {
* @cqm_work_cpu: worker CPU for CQM h/w counters
* @plr: pseudo-locked region (if any) associated with domain
* @staged_config: parsed configuration to be applied
+ * @mbps_val: When mba_sc is enabled, this holds the array of user
+ * specified control values for mba_sc in MBps, indexed
+ * by closid
*/
struct rdt_domain {
struct list_head list;
@@ -67,16 +83,18 @@ struct rdt_domain {
int cqm_work_cpu;
struct pseudo_lock_region *plr;
struct resctrl_staged_config staged_config[CDP_NUM_TYPES];
+ u32 *mbps_val;
};
/**
* struct resctrl_cache - Cache allocation related data
* @cbm_len: Length of the cache bit mask
- * @min_cbm_bits: Minimum number of consecutive bits to be set
+ * @min_cbm_bits: Minimum number of consecutive bits to be set.
+ * The value 0 means the architecture can support
+ * zero CBM.
* @shareable_bits: Bitmask of shareable resource with other
* executing entities
* @arch_has_sparse_bitmaps: True if a bitmap like f00f is valid.
- * @arch_has_empty_bitmaps: True if the '0' bitmap is valid.
* @arch_has_per_cpu_cfg: True if QOS_CFG register for this cache
* level has CPU scope.
*/
@@ -85,7 +103,6 @@ struct resctrl_cache {
unsigned int min_cbm_bits;
unsigned int shareable_bits;
bool arch_has_sparse_bitmaps;
- bool arch_has_empty_bitmaps;
bool arch_has_per_cpu_cfg;
};
@@ -130,8 +147,6 @@ struct resctrl_schema;
/**
* struct rdt_resource - attributes of a resctrl resource
* @rid: The index of the resource
- * @alloc_enabled: Is allocation enabled on this machine
- * @mon_enabled: Is monitoring enabled for this feature
* @alloc_capable: Is allocation available on this machine
* @mon_capable: Is monitor feature available on this machine
* @num_rmid: Number of RMIDs available
@@ -150,8 +165,6 @@ struct resctrl_schema;
*/
struct rdt_resource {
int rid;
- bool alloc_enabled;
- bool mon_enabled;
bool alloc_capable;
bool mon_capable;
int num_rmid;
@@ -194,7 +207,50 @@ struct resctrl_schema {
/* The number of closid supported by this resource regardless of CDP */
u32 resctrl_arch_get_num_closid(struct rdt_resource *r);
int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid);
+
+/*
+ * Update the ctrl_val and apply this config right now.
+ * Must be called on one of the domain's CPUs.
+ */
+int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d,
+ u32 closid, enum resctrl_conf_type t, u32 cfg_val);
+
u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d,
u32 closid, enum resctrl_conf_type type);
+int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d);
+void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d);
+
+/**
+ * resctrl_arch_rmid_read() - Read the eventid counter corresponding to rmid
+ * for this resource and domain.
+ * @r: resource that the counter should be read from.
+ * @d: domain that the counter should be read from.
+ * @rmid: rmid of the counter to read.
+ * @eventid: eventid to read, e.g. L3 occupancy.
+ * @val: result of the counter read in bytes.
+ *
+ * Call from process context on a CPU that belongs to domain @d.
+ *
+ * Return:
+ * 0 on success, or -EIO, -EINVAL etc on error.
+ */
+int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
+ u32 rmid, enum resctrl_event_id eventid, u64 *val);
+
+/**
+ * resctrl_arch_reset_rmid() - Reset any private state associated with rmid
+ * and eventid.
+ * @r: The domain's resource.
+ * @d: The rmid's domain.
+ * @rmid: The rmid whose counter values should be reset.
+ * @eventid: The eventid whose counter values should be reset.
+ *
+ * This can be called from any CPU.
+ */
+void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d,
+ u32 rmid, enum resctrl_event_id eventid);
+
+extern unsigned int resctrl_rmid_realloc_threshold;
+extern unsigned int resctrl_rmid_realloc_limit;
#endif /* _RESCTRL_H */
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 68dab3e08aad..5b5357c0bd8c 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -323,29 +323,36 @@ static inline struct rhash_lock_head __rcu **rht_bucket_insert(
* When we write to a bucket without unlocking, we use rht_assign_locked().
*/
-static inline void rht_lock(struct bucket_table *tbl,
- struct rhash_lock_head __rcu **bkt)
+static inline unsigned long rht_lock(struct bucket_table *tbl,
+ struct rhash_lock_head __rcu **bkt)
{
- local_bh_disable();
+ unsigned long flags;
+
+ local_irq_save(flags);
bit_spin_lock(0, (unsigned long *)bkt);
lock_map_acquire(&tbl->dep_map);
+ return flags;
}
-static inline void rht_lock_nested(struct bucket_table *tbl,
- struct rhash_lock_head __rcu **bucket,
- unsigned int subclass)
+static inline unsigned long rht_lock_nested(struct bucket_table *tbl,
+ struct rhash_lock_head __rcu **bucket,
+ unsigned int subclass)
{
- local_bh_disable();
+ unsigned long flags;
+
+ local_irq_save(flags);
bit_spin_lock(0, (unsigned long *)bucket);
lock_acquire_exclusive(&tbl->dep_map, subclass, 0, NULL, _THIS_IP_);
+ return flags;
}
static inline void rht_unlock(struct bucket_table *tbl,
- struct rhash_lock_head __rcu **bkt)
+ struct rhash_lock_head __rcu **bkt,
+ unsigned long flags)
{
lock_map_release(&tbl->dep_map);
bit_spin_unlock(0, (unsigned long *)bkt);
- local_bh_enable();
+ local_irq_restore(flags);
}
static inline struct rhash_head *__rht_ptr(
@@ -393,7 +400,8 @@ static inline void rht_assign_locked(struct rhash_lock_head __rcu **bkt,
static inline void rht_assign_unlock(struct bucket_table *tbl,
struct rhash_lock_head __rcu **bkt,
- struct rhash_head *obj)
+ struct rhash_head *obj,
+ unsigned long flags)
{
if (rht_is_a_nulls(obj))
obj = NULL;
@@ -401,7 +409,7 @@ static inline void rht_assign_unlock(struct bucket_table *tbl,
rcu_assign_pointer(*bkt, (void *)obj);
preempt_enable();
__release(bitlock);
- local_bh_enable();
+ local_irq_restore(flags);
}
/**
@@ -706,6 +714,7 @@ static inline void *__rhashtable_insert_fast(
struct rhash_head __rcu **pprev;
struct bucket_table *tbl;
struct rhash_head *head;
+ unsigned long flags;
unsigned int hash;
int elasticity;
void *data;
@@ -720,11 +729,11 @@ static inline void *__rhashtable_insert_fast(
if (!bkt)
goto out;
pprev = NULL;
- rht_lock(tbl, bkt);
+ flags = rht_lock(tbl, bkt);
if (unlikely(rcu_access_pointer(tbl->future_tbl))) {
slow_path:
- rht_unlock(tbl, bkt);
+ rht_unlock(tbl, bkt, flags);
rcu_read_unlock();
return rhashtable_insert_slow(ht, key, obj);
}
@@ -756,9 +765,9 @@ slow_path:
RCU_INIT_POINTER(list->rhead.next, head);
if (pprev) {
rcu_assign_pointer(*pprev, obj);
- rht_unlock(tbl, bkt);
+ rht_unlock(tbl, bkt, flags);
} else
- rht_assign_unlock(tbl, bkt, obj);
+ rht_assign_unlock(tbl, bkt, obj, flags);
data = NULL;
goto out;
}
@@ -785,7 +794,7 @@ slow_path:
}
atomic_inc(&ht->nelems);
- rht_assign_unlock(tbl, bkt, obj);
+ rht_assign_unlock(tbl, bkt, obj, flags);
if (rht_grow_above_75(ht, tbl))
schedule_work(&ht->run_work);
@@ -797,7 +806,7 @@ out:
return data;
out_unlock:
- rht_unlock(tbl, bkt);
+ rht_unlock(tbl, bkt, flags);
goto out;
}
@@ -991,6 +1000,7 @@ static inline int __rhashtable_remove_fast_one(
struct rhash_lock_head __rcu **bkt;
struct rhash_head __rcu **pprev;
struct rhash_head *he;
+ unsigned long flags;
unsigned int hash;
int err = -ENOENT;
@@ -999,7 +1009,7 @@ static inline int __rhashtable_remove_fast_one(
if (!bkt)
return -ENOENT;
pprev = NULL;
- rht_lock(tbl, bkt);
+ flags = rht_lock(tbl, bkt);
rht_for_each_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) {
struct rhlist_head *list;
@@ -1043,14 +1053,14 @@ static inline int __rhashtable_remove_fast_one(
if (pprev) {
rcu_assign_pointer(*pprev, obj);
- rht_unlock(tbl, bkt);
+ rht_unlock(tbl, bkt, flags);
} else {
- rht_assign_unlock(tbl, bkt, obj);
+ rht_assign_unlock(tbl, bkt, obj, flags);
}
goto unlocked;
}
- rht_unlock(tbl, bkt);
+ rht_unlock(tbl, bkt, flags);
unlocked:
if (err > 0) {
atomic_dec(&ht->nelems);
@@ -1143,6 +1153,7 @@ static inline int __rhashtable_replace_fast(
struct rhash_lock_head __rcu **bkt;
struct rhash_head __rcu **pprev;
struct rhash_head *he;
+ unsigned long flags;
unsigned int hash;
int err = -ENOENT;
@@ -1158,7 +1169,7 @@ static inline int __rhashtable_replace_fast(
return -ENOENT;
pprev = NULL;
- rht_lock(tbl, bkt);
+ flags = rht_lock(tbl, bkt);
rht_for_each_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) {
if (he != obj_old) {
@@ -1169,15 +1180,15 @@ static inline int __rhashtable_replace_fast(
rcu_assign_pointer(obj_new->next, obj_old->next);
if (pprev) {
rcu_assign_pointer(*pprev, obj_new);
- rht_unlock(tbl, bkt);
+ rht_unlock(tbl, bkt, flags);
} else {
- rht_assign_unlock(tbl, bkt, obj_new);
+ rht_assign_unlock(tbl, bkt, obj_new, flags);
}
err = 0;
goto unlocked;
}
- rht_unlock(tbl, bkt);
+ rht_unlock(tbl, bkt, flags);
unlocked:
return err;
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index dac53fd3afea..3c7d295746f6 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -100,8 +100,8 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k
int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full);
__poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
- struct file *filp, poll_table *poll_table);
-
+ struct file *filp, poll_table *poll_table, int full);
+void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu);
#define RING_BUFFER_ALL_CPUS -1
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index bf80adca980b..bd3504d11b15 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -41,12 +41,15 @@ struct anon_vma {
atomic_t refcount;
/*
- * Count of child anon_vmas and VMAs which points to this anon_vma.
+ * Count of child anon_vmas. Equals to the count of all anon_vmas that
+ * have ->parent pointing to this one, including itself.
*
* This counter is used for making decision about reusing anon_vma
* instead of forking new one. See comments in function anon_vma_clone.
*/
- unsigned degree;
+ unsigned long num_children;
+ /* Count of VMAs whose ->anon_vma pointer points to this object. */
+ unsigned long num_active_vmas;
struct anon_vma *parent; /* Parent of this anon_vma */
@@ -163,7 +166,7 @@ static inline void anon_vma_merge(struct vm_area_struct *vma,
unlink_anon_vmas(next);
}
-struct anon_vma *page_get_anon_vma(struct page *page);
+struct anon_vma *folio_get_anon_vma(struct folio *folio);
/* RMAP flags, currently only relevant for some anon rmap operations. */
typedef int __bitwise rmap_t;
@@ -267,7 +270,7 @@ dup:
* @page: the exclusive anonymous page to try marking possibly shared
*
* The caller needs to hold the PT lock and has to have the page table entry
- * cleared/invalidated+flushed, to properly sync against GUP-fast.
+ * cleared/invalidated.
*
* This is similar to page_try_dup_anon_rmap(), however, not used during fork()
* to duplicate a mapping, but instead to prepare for KSM or temporarily
@@ -283,12 +286,68 @@ static inline int page_try_share_anon_rmap(struct page *page)
{
VM_BUG_ON_PAGE(!PageAnon(page) || !PageAnonExclusive(page), page);
- /* See page_try_dup_anon_rmap(). */
- if (likely(!is_device_private_page(page) &&
- unlikely(page_maybe_dma_pinned(page))))
- return -EBUSY;
+ /* device private pages cannot get pinned via GUP. */
+ if (unlikely(is_device_private_page(page))) {
+ ClearPageAnonExclusive(page);
+ return 0;
+ }
+
+ /*
+ * We have to make sure that when we clear PageAnonExclusive, that
+ * the page is not pinned and that concurrent GUP-fast won't succeed in
+ * concurrently pinning the page.
+ *
+ * Conceptually, PageAnonExclusive clearing consists of:
+ * (A1) Clear PTE
+ * (A2) Check if the page is pinned; back off if so.
+ * (A3) Clear PageAnonExclusive
+ * (A4) Restore PTE (optional, but certainly not writable)
+ *
+ * When clearing PageAnonExclusive, we cannot possibly map the page
+ * writable again, because anon pages that may be shared must never
+ * be writable. So in any case, if the PTE was writable it cannot
+ * be writable anymore afterwards and there would be a PTE change. Only
+ * if the PTE wasn't writable, there might not be a PTE change.
+ *
+ * Conceptually, GUP-fast pinning of an anon page consists of:
+ * (B1) Read the PTE
+ * (B2) FOLL_WRITE: check if the PTE is not writable; back off if so.
+ * (B3) Pin the mapped page
+ * (B4) Check if the PTE changed by re-reading it; back off if so.
+ * (B5) If the original PTE is not writable, check if
+ * PageAnonExclusive is not set; back off if so.
+ *
+ * If the PTE was writable, we only have to make sure that GUP-fast
+ * observes a PTE change and properly backs off.
+ *
+ * If the PTE was not writable, we have to make sure that GUP-fast either
+ * detects a (temporary) PTE change or that PageAnonExclusive is cleared
+ * and properly backs off.
+ *
+ * Consequently, when clearing PageAnonExclusive(), we have to make
+ * sure that (A1), (A2)/(A3) and (A4) happen in the right memory
+ * order. In GUP-fast pinning code, we have to make sure that (B3),(B4)
+ * and (B5) happen in the right memory order.
+ *
+ * We assume that there might not be a memory barrier after
+ * clearing/invalidating the PTE (A1) and before restoring the PTE (A4),
+ * so we use explicit ones here.
+ */
+ /* Paired with the memory barrier in try_grab_folio(). */
+ if (IS_ENABLED(CONFIG_HAVE_FAST_GUP))
+ smp_mb();
+
+ if (unlikely(page_maybe_dma_pinned(page)))
+ return -EBUSY;
ClearPageAnonExclusive(page);
+
+ /*
+ * This is conceptually a smp_wmb() paired with the smp_rmb() in
+ * gup_must_unshare().
+ */
+ if (IS_ENABLED(CONFIG_HAVE_FAST_GUP))
+ smp_mb__after_atomic();
return 0;
}
@@ -402,13 +461,8 @@ struct rmap_walk_control {
void rmap_walk(struct folio *folio, struct rmap_walk_control *rwc);
void rmap_walk_locked(struct folio *folio, struct rmap_walk_control *rwc);
-
-/*
- * Called by memory-failure.c to kill processes.
- */
struct anon_vma *folio_lock_anon_vma_read(struct folio *folio,
struct rmap_walk_control *rwc);
-void page_unlock_anon_vma_read(struct anon_vma *anon_vma);
#else /* !CONFIG_MMU */
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index ae2c6a3cec5d..92ad75549e9c 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -12,21 +12,22 @@
extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo);
extern int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid);
extern void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid,
- u32 group, struct nlmsghdr *nlh, gfp_t flags);
+ u32 group, const struct nlmsghdr *nlh, gfp_t flags);
extern void rtnl_set_sk_err(struct net *net, u32 group, int error);
extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics);
extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst,
u32 id, long expires, u32 error);
-void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change, gfp_t flags);
+void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change, gfp_t flags,
+ u32 portid, const struct nlmsghdr *nlh);
void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change,
gfp_t flags, int *new_nsid, int new_ifindex);
struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
unsigned change, u32 event,
gfp_t flags, int *new_nsid,
- int new_ifindex);
+ int new_ifindex, u32 portid, u32 seq);
void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev,
- gfp_t flags);
+ gfp_t flags, u32 portid, const struct nlmsghdr *nlh);
/* RTNL is used as a global lock for all changes to network configuration */
diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h
index 8f416c5e929e..c0ef596f340b 100644
--- a/include/linux/rwlock.h
+++ b/include/linux/rwlock.h
@@ -1,7 +1,7 @@
#ifndef __LINUX_RWLOCK_H
#define __LINUX_RWLOCK_H
-#ifndef __LINUX_SPINLOCK_H
+#ifndef __LINUX_INSIDE_SPINLOCK_H
# error "please don't include this file directly"
#endif
diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index 8f5a86e210b9..d662cf136021 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -87,11 +87,6 @@ struct sbitmap {
*/
struct sbq_wait_state {
/**
- * @wait_cnt: Number of frees remaining before we wake up.
- */
- atomic_t wait_cnt;
-
- /**
* @wait: Wait queue.
*/
wait_queue_head_t wait;
@@ -138,6 +133,17 @@ struct sbitmap_queue {
* sbitmap_queue_get_shallow()
*/
unsigned int min_shallow_depth;
+
+ /**
+ * @completion_cnt: Number of bits cleared passed to the
+ * wakeup function.
+ */
+ atomic_t completion_cnt;
+
+ /**
+ * @wakeup_cnt: Number of thread wake ups issued.
+ */
+ atomic_t wakeup_cnt;
};
/**
@@ -575,8 +581,9 @@ void sbitmap_queue_wake_all(struct sbitmap_queue *sbq);
* sbitmap_queue_wake_up() - Wake up some of waiters in one waitqueue
* on a &struct sbitmap_queue.
* @sbq: Bitmap queue to wake up.
+ * @nr: Number of bits cleared.
*/
-void sbitmap_queue_wake_up(struct sbitmap_queue *sbq);
+void sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr);
/**
* sbitmap_queue_show() - Dump &struct sbitmap_queue information to a &struct
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e7b2f8a5c711..853d08f7562b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -14,6 +14,7 @@
#include <linux/pid.h>
#include <linux/sem.h>
#include <linux/shm.h>
+#include <linux/kmsan_types.h>
#include <linux/mutex.h>
#include <linux/plist.h>
#include <linux/hrtimer.h>
@@ -81,25 +82,34 @@ struct task_group;
*/
/* Used in tsk->state: */
-#define TASK_RUNNING 0x0000
-#define TASK_INTERRUPTIBLE 0x0001
-#define TASK_UNINTERRUPTIBLE 0x0002
-#define __TASK_STOPPED 0x0004
-#define __TASK_TRACED 0x0008
+#define TASK_RUNNING 0x00000000
+#define TASK_INTERRUPTIBLE 0x00000001
+#define TASK_UNINTERRUPTIBLE 0x00000002
+#define __TASK_STOPPED 0x00000004
+#define __TASK_TRACED 0x00000008
/* Used in tsk->exit_state: */
-#define EXIT_DEAD 0x0010
-#define EXIT_ZOMBIE 0x0020
+#define EXIT_DEAD 0x00000010
+#define EXIT_ZOMBIE 0x00000020
#define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD)
/* Used in tsk->state again: */
-#define TASK_PARKED 0x0040
-#define TASK_DEAD 0x0080
-#define TASK_WAKEKILL 0x0100
-#define TASK_WAKING 0x0200
-#define TASK_NOLOAD 0x0400
-#define TASK_NEW 0x0800
-/* RT specific auxilliary flag to mark RT lock waiters */
-#define TASK_RTLOCK_WAIT 0x1000
-#define TASK_STATE_MAX 0x2000
+#define TASK_PARKED 0x00000040
+#define TASK_DEAD 0x00000080
+#define TASK_WAKEKILL 0x00000100
+#define TASK_WAKING 0x00000200
+#define TASK_NOLOAD 0x00000400
+#define TASK_NEW 0x00000800
+#define TASK_RTLOCK_WAIT 0x00001000
+#define TASK_FREEZABLE 0x00002000
+#define __TASK_FREEZABLE_UNSAFE (0x00004000 * IS_ENABLED(CONFIG_LOCKDEP))
+#define TASK_FROZEN 0x00008000
+#define TASK_STATE_MAX 0x00010000
+
+#define TASK_ANY (TASK_STATE_MAX-1)
+
+/*
+ * DO NOT ADD ANY NEW USERS !
+ */
+#define TASK_FREEZABLE_UNSAFE (TASK_FREEZABLE | __TASK_FREEZABLE_UNSAFE)
/* Convenience macros for the sake of set_current_state: */
#define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
@@ -860,12 +870,6 @@ struct task_struct {
struct mm_struct *mm;
struct mm_struct *active_mm;
- /* Per-thread vma caching: */
- struct vmacache vmacache;
-
-#ifdef SPLIT_RSS_COUNTING
- struct task_rss_stat rss_stat;
-#endif
int exit_state;
int exit_code;
int exit_signal;
@@ -881,9 +885,6 @@ struct task_struct {
unsigned sched_reset_on_fork:1;
unsigned sched_contributes_to_load:1;
unsigned sched_migrated:1;
-#ifdef CONFIG_PSI
- unsigned sched_psi_wake_requeue:1;
-#endif
/* Force alignment to the next boundary: */
unsigned :0;
@@ -914,6 +915,10 @@ struct task_struct {
#ifdef CONFIG_MEMCG
unsigned in_user_fault:1;
#endif
+#ifdef CONFIG_LRU_GEN
+ /* whether the LRU algorithm may apply to this access */
+ unsigned in_lru_fault:1;
+#endif
#ifdef CONFIG_COMPAT_BRK
unsigned brk_randomized:1;
#endif
@@ -936,7 +941,7 @@ struct task_struct {
#endif
#ifdef CONFIG_EVENTFD
/* Recursion prevention for eventfd_signal() */
- unsigned in_eventfd_signal:1;
+ unsigned in_eventfd:1;
#endif
#ifdef CONFIG_IOMMU_SVA
unsigned pasid_activated:1;
@@ -944,6 +949,10 @@ struct task_struct {
#ifdef CONFIG_CPU_SUP_INTEL
unsigned reported_split_lock:1;
#endif
+#ifdef CONFIG_TASK_DELAY_ACCT
+ /* delay due to memory thrashing */
+ unsigned in_thrashing:1;
+#endif
unsigned long atomic_flags; /* Flags requiring atomic access. */
@@ -1228,7 +1237,7 @@ struct task_struct {
unsigned int futex_state;
#endif
#ifdef CONFIG_PERF_EVENTS
- struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
+ struct perf_event_context *perf_event_ctxp;
struct mutex perf_event_mutex;
struct list_head perf_event_list;
#endif
@@ -1355,6 +1364,10 @@ struct task_struct {
#endif
#endif
+#ifdef CONFIG_KMSAN
+ struct kmsan_ctx kmsan_ctx;
+#endif
+
#if IS_ENABLED(CONFIG_KUNIT)
struct kunit *kunit_test;
#endif
@@ -1381,9 +1394,6 @@ struct task_struct {
#endif
#ifdef CONFIG_TRACING
- /* State flags for use by tracers: */
- unsigned long trace;
-
/* Bitmask and counter of trace recursion: */
unsigned long trace_recursion;
#endif /* CONFIG_TRACING */
@@ -1713,8 +1723,9 @@ extern struct pid *cad_pid;
#define PF_MEMALLOC 0x00000800 /* Allocating memory */
#define PF_NPROC_EXCEEDED 0x00001000 /* set_user() noticed that RLIMIT_NPROC was exceeded */
#define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */
+#define PF__HOLE__00004000 0x00004000
#define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */
-#define PF_FROZEN 0x00010000 /* Frozen for system suspend */
+#define PF__HOLE__00010000 0x00010000
#define PF_KSWAPD 0x00020000 /* I am kswapd */
#define PF_MEMALLOC_NOFS 0x00040000 /* All allocation requests will inherit GFP_NOFS */
#define PF_MEMALLOC_NOIO 0x00080000 /* All allocation requests will inherit GFP_NOIO */
@@ -1722,10 +1733,14 @@ extern struct pid *cad_pid;
* I am cleaning dirty pages from some other bdi. */
#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
#define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */
+#define PF__HOLE__00800000 0x00800000
+#define PF__HOLE__01000000 0x01000000
+#define PF__HOLE__02000000 0x02000000
#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */
#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
#define PF_MEMALLOC_PIN 0x10000000 /* Allocation context constrained to zones which allow long term pinning. */
-#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */
+#define PF__HOLE__20000000 0x20000000
+#define PF__HOLE__40000000 0x40000000
#define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */
/*
diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h
index 4d0a5be28b70..8270ad7ae14c 100644
--- a/include/linux/sched/coredump.h
+++ b/include/linux/sched/coredump.h
@@ -71,9 +71,8 @@ static inline int get_dumpable(struct mm_struct *mm)
#define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */
#define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */
#define MMF_DISABLE_THP 24 /* disable THP for all VMAs */
-#define MMF_OOM_VICTIM 25 /* mm is the oom victim */
-#define MMF_OOM_REAP_QUEUED 26 /* mm was queued for oom_reaper */
-#define MMF_MULTIPROCESS 27 /* mm is shared between processes */
+#define MMF_OOM_REAP_QUEUED 25 /* mm was queued for oom_reaper */
+#define MMF_MULTIPROCESS 26 /* mm is shared between processes */
/*
* MMF_HAS_PINNED: Whether this mm has pinned any pages. This can be either
* replaced in the future by mm.pinned_vm when it becomes stable, or grow into
@@ -81,7 +80,7 @@ static inline int get_dumpable(struct mm_struct *mm)
* pinned pages were unpinned later on, we'll still keep this bit set for the
* lifecycle of this mm, just for simplicity.
*/
-#define MMF_HAS_PINNED 28 /* FOLL_PIN has run, never cleared */
+#define MMF_HAS_PINNED 27 /* FOLL_PIN has run, never cleared */
#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP)
#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index cafbe03eed01..20099268fa25 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -94,6 +94,7 @@ struct signal_struct {
refcount_t sigcnt;
atomic_t live;
int nr_threads;
+ int quick_threads;
struct list_head thread_head;
wait_queue_head_t wait_chldexit; /* for wait4() */
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index e650946816d0..5a64582b086b 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -4,8 +4,6 @@
#include <linux/types.h>
-struct ctl_table;
-
#ifdef CONFIG_DETECT_HUNG_TASK
/* used for hung_task and block/ */
extern unsigned long sysctl_hung_task_timeout_secs;
@@ -31,7 +29,4 @@ extern int sysctl_numa_balancing_mode;
#define sysctl_numa_balancing_mode 0
#endif
-int sysctl_numa_balancing(struct ctl_table *table, int write, void *buffer,
- size_t *lenp, loff_t *ppos);
-
#endif /* _LINUX_SCHED_SYSCTL_H */
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 81cab4b01edc..d6c48163c6de 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -127,6 +127,9 @@ static inline void put_task_struct_many(struct task_struct *t, int nr)
void put_task_struct_rcu_user(struct task_struct *task);
+/* Free all architecture-specific resources held by a thread. */
+void release_thread(struct task_struct *dead_task);
+
#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
extern int arch_task_struct_size __read_mostly;
#else
diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index f054d0360a75..4cc52698e214 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -25,7 +25,7 @@ struct user_struct {
#if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL) || \
defined(CONFIG_NET) || defined(CONFIG_IO_URING) || \
- defined(CONFIG_VFIO_PCI_ZDEV_KVM)
+ defined(CONFIG_VFIO_PCI_ZDEV_KVM) || IS_ENABLED(CONFIG_IOMMUFD)
atomic_long_t locked_vm;
#endif
#ifdef CONFIG_WATCH_QUEUE
diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h
index a193884ecf2b..4f765bc788ff 100644
--- a/include/linux/scmi_protocol.h
+++ b/include/linux/scmi_protocol.h
@@ -84,7 +84,7 @@ struct scmi_protocol_handle;
struct scmi_clk_proto_ops {
int (*count_get)(const struct scmi_protocol_handle *ph);
- const struct scmi_clock_info *(*info_get)
+ const struct scmi_clock_info __must_check *(*info_get)
(const struct scmi_protocol_handle *ph, u32 clk_id);
int (*rate_get)(const struct scmi_protocol_handle *ph, u32 clk_id,
u64 *rate);
@@ -466,7 +466,7 @@ enum scmi_sensor_class {
*/
struct scmi_sensor_proto_ops {
int (*count_get)(const struct scmi_protocol_handle *ph);
- const struct scmi_sensor_info *(*info_get)
+ const struct scmi_sensor_info __must_check *(*info_get)
(const struct scmi_protocol_handle *ph, u32 sensor_id);
int (*trip_point_config)(const struct scmi_protocol_handle *ph,
u32 sensor_id, u8 trip_id, u64 trip_value);
diff --git a/include/linux/scs.h b/include/linux/scs.h
index 18122d9e17ff..4ab5bdc898cf 100644
--- a/include/linux/scs.h
+++ b/include/linux/scs.h
@@ -53,6 +53,22 @@ static inline bool task_scs_end_corrupted(struct task_struct *tsk)
return sz >= SCS_SIZE - 1 || READ_ONCE_NOCHECK(*magic) != SCS_END_MAGIC;
}
+DECLARE_STATIC_KEY_FALSE(dynamic_scs_enabled);
+
+static inline bool scs_is_dynamic(void)
+{
+ if (!IS_ENABLED(CONFIG_DYNAMIC_SCS))
+ return false;
+ return static_branch_likely(&dynamic_scs_enabled);
+}
+
+static inline bool scs_is_enabled(void)
+{
+ if (!IS_ENABLED(CONFIG_DYNAMIC_SCS))
+ return true;
+ return scs_is_dynamic();
+}
+
#else /* CONFIG_SHADOW_CALL_STACK */
static inline void *scs_alloc(int node) { return NULL; }
@@ -62,6 +78,8 @@ static inline void scs_task_reset(struct task_struct *tsk) {}
static inline int scs_prepare(struct task_struct *tsk, int node) { return 0; }
static inline void scs_release(struct task_struct *tsk) {}
static inline bool task_scs_end_corrupted(struct task_struct *tsk) { return false; }
+static inline bool scs_is_enabled(void) { return false; }
+static inline bool scs_is_dynamic(void) { return false; }
#endif /* CONFIG_SHADOW_CALL_STACK */
diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index a86e852507b3..358dc08e0831 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -820,4 +820,9 @@ struct sctp_new_encap_port_hdr {
__be16 new_port;
};
+/* Round an int up to the next multiple of 4. */
+#define SCTP_PAD4(s) (((s)+3)&~3)
+/* Truncate to the previous multiple of 4. */
+#define SCTP_TRUNC4(s) ((s)&~3)
+
#endif /* __LINUX_SCTP_H__ */
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index d31d76be4982..175079552f68 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -27,6 +27,7 @@ struct seccomp_filter;
*
* @mode: indicates one of the valid values above for controlled
* system calls available to a process.
+ * @filter_count: number of seccomp filters
* @filter: must always point to a valid seccomp-filter or NULL as it is
* accessed without locking during system call entry.
*
diff --git a/include/linux/security.h b/include/linux/security.h
index 1bc362cb413f..5b67f208f7de 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -31,6 +31,7 @@
#include <linux/err.h>
#include <linux/string.h>
#include <linux/mm.h>
+#include <linux/sockptr.h>
struct linux_binprm;
struct cred;
@@ -114,6 +115,7 @@ enum lockdown_reason {
LOCKDOWN_IOPORT,
LOCKDOWN_MSR,
LOCKDOWN_ACPI_TABLES,
+ LOCKDOWN_DEVICE_TREE,
LOCKDOWN_PCMCIA_CIS,
LOCKDOWN_TIOCSSERIAL,
LOCKDOWN_MODULE_PARAMETERS,
@@ -122,6 +124,7 @@ enum lockdown_reason {
LOCKDOWN_XMON_WR,
LOCKDOWN_BPF_WRITE_USER,
LOCKDOWN_DBG_WRITE_KERNEL,
+ LOCKDOWN_RTAS_ERROR_INJECTION,
LOCKDOWN_INTEGRITY_MAX,
LOCKDOWN_KCORE,
LOCKDOWN_KPROBES,
@@ -359,6 +362,13 @@ int security_inode_getattr(const struct path *path);
int security_inode_setxattr(struct user_namespace *mnt_userns,
struct dentry *dentry, const char *name,
const void *value, size_t size, int flags);
+int security_inode_set_acl(struct user_namespace *mnt_userns,
+ struct dentry *dentry, const char *acl_name,
+ struct posix_acl *kacl);
+int security_inode_get_acl(struct user_namespace *mnt_userns,
+ struct dentry *dentry, const char *acl_name);
+int security_inode_remove_acl(struct user_namespace *mnt_userns,
+ struct dentry *dentry, const char *acl_name);
void security_inode_post_setxattr(struct dentry *dentry, const char *name,
const void *value, size_t size, int flags);
int security_inode_getxattr(struct dentry *dentry, const char *name);
@@ -394,6 +404,7 @@ int security_file_send_sigiotask(struct task_struct *tsk,
struct fown_struct *fown, int sig);
int security_file_receive(struct file *file);
int security_file_open(struct file *file);
+int security_file_truncate(struct file *file);
int security_task_alloc(struct task_struct *task, unsigned long clone_flags);
void security_task_free(struct task_struct *task);
int security_cred_alloc_blank(struct cred *cred, gfp_t gfp);
@@ -437,6 +448,7 @@ int security_task_kill(struct task_struct *p, struct kernel_siginfo *info,
int security_task_prctl(int option, unsigned long arg2, unsigned long arg3,
unsigned long arg4, unsigned long arg5);
void security_task_to_inode(struct task_struct *p, struct inode *inode);
+int security_create_user_ns(const struct cred *cred);
int security_ipc_permission(struct kern_ipc_perm *ipcp, short flag);
void security_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid);
int security_msg_msg_alloc(struct msg_msg *msg);
@@ -461,7 +473,7 @@ int security_sem_semctl(struct kern_ipc_perm *sma, int cmd);
int security_sem_semop(struct kern_ipc_perm *sma, struct sembuf *sops,
unsigned nsops, int alter);
void security_d_instantiate(struct dentry *dentry, struct inode *inode);
-int security_getprocattr(struct task_struct *p, const char *lsm, char *name,
+int security_getprocattr(struct task_struct *p, const char *lsm, const char *name,
char **value);
int security_setprocattr(const char *lsm, const char *name, void *value,
size_t size);
@@ -869,6 +881,28 @@ static inline int security_inode_setxattr(struct user_namespace *mnt_userns,
return cap_inode_setxattr(dentry, name, value, size, flags);
}
+static inline int security_inode_set_acl(struct user_namespace *mnt_userns,
+ struct dentry *dentry,
+ const char *acl_name,
+ struct posix_acl *kacl)
+{
+ return 0;
+}
+
+static inline int security_inode_get_acl(struct user_namespace *mnt_userns,
+ struct dentry *dentry,
+ const char *acl_name)
+{
+ return 0;
+}
+
+static inline int security_inode_remove_acl(struct user_namespace *mnt_userns,
+ struct dentry *dentry,
+ const char *acl_name)
+{
+ return 0;
+}
+
static inline void security_inode_post_setxattr(struct dentry *dentry,
const char *name, const void *value, size_t size, int flags)
{ }
@@ -1011,6 +1045,11 @@ static inline int security_file_open(struct file *file)
return 0;
}
+static inline int security_file_truncate(struct file *file)
+{
+ return 0;
+}
+
static inline int security_task_alloc(struct task_struct *task,
unsigned long clone_flags)
{
@@ -1194,6 +1233,11 @@ static inline int security_task_prctl(int option, unsigned long arg2,
static inline void security_task_to_inode(struct task_struct *p, struct inode *inode)
{ }
+static inline int security_create_user_ns(const struct cred *cred)
+{
+ return 0;
+}
+
static inline int security_ipc_permission(struct kern_ipc_perm *ipcp,
short flag)
{
@@ -1301,7 +1345,7 @@ static inline void security_d_instantiate(struct dentry *dentry,
{ }
static inline int security_getprocattr(struct task_struct *p, const char *lsm,
- char *name, char **value)
+ const char *name, char **value)
{
return -EINVAL;
}
@@ -1403,8 +1447,8 @@ int security_socket_getsockopt(struct socket *sock, int level, int optname);
int security_socket_setsockopt(struct socket *sock, int level, int optname);
int security_socket_shutdown(struct socket *sock, int how);
int security_sock_rcv_skb(struct sock *sk, struct sk_buff *skb);
-int security_socket_getpeersec_stream(struct socket *sock, char __user *optval,
- int __user *optlen, unsigned len);
+int security_socket_getpeersec_stream(struct socket *sock, sockptr_t optval,
+ sockptr_t optlen, unsigned int len);
int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u32 *secid);
int security_sk_alloc(struct sock *sk, int family, gfp_t priority);
void security_sk_free(struct sock *sk);
@@ -1540,8 +1584,10 @@ static inline int security_sock_rcv_skb(struct sock *sk,
return 0;
}
-static inline int security_socket_getpeersec_stream(struct socket *sock, char __user *optval,
- int __user *optlen, unsigned len)
+static inline int security_socket_getpeersec_stream(struct socket *sock,
+ sockptr_t optval,
+ sockptr_t optlen,
+ unsigned int len)
{
return -ENOPROTOOPT;
}
@@ -2060,6 +2106,7 @@ static inline int security_perf_event_write(struct perf_event *event)
#ifdef CONFIG_SECURITY
extern int security_uring_override_creds(const struct cred *new);
extern int security_uring_sqpoll(void);
+extern int security_uring_cmd(struct io_uring_cmd *ioucmd);
#else
static inline int security_uring_override_creds(const struct cred *new)
{
@@ -2069,6 +2116,10 @@ static inline int security_uring_sqpoll(void)
{
return 0;
}
+static inline int security_uring_cmd(struct io_uring_cmd *ioucmd)
+{
+ return 0;
+}
#endif /* CONFIG_SECURITY */
#endif /* CONFIG_IO_URING */
diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h
index 1ac0d712a9c3..31ac562a17d7 100644
--- a/include/linux/sed-opal.h
+++ b/include/linux/sed-opal.h
@@ -11,7 +11,8 @@
#define LINUX_OPAL_H
#include <uapi/linux/sed-opal.h>
-#include <linux/kernel.h>
+#include <linux/compiler_types.h>
+#include <linux/types.h>
struct opal_dev;
@@ -43,6 +44,7 @@ static inline bool is_sed_ioctl(unsigned int cmd)
case IOC_OPAL_MBR_DONE:
case IOC_OPAL_WRITE_SHADOW_MBR:
case IOC_OPAL_GENERIC_TABLE_RW:
+ case IOC_OPAL_GET_STATUS:
return true;
}
return false;
diff --git a/include/linux/serdev.h b/include/linux/serdev.h
index 3368c261ab62..66f624fc618c 100644
--- a/include/linux/serdev.h
+++ b/include/linux/serdev.h
@@ -7,6 +7,7 @@
#include <linux/types.h>
#include <linux/device.h>
+#include <linux/uaccess.h>
#include <linux/termios.h>
#include <linux/delay.h>
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index 8c7b793aa4d7..19376bee9667 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -32,7 +32,7 @@ struct plat_serial8250_port {
void (*serial_out)(struct uart_port *, int, int);
void (*set_termios)(struct uart_port *,
struct ktermios *new,
- struct ktermios *old);
+ const struct ktermios *old);
void (*set_ldisc)(struct uart_port *,
struct ktermios *);
unsigned int (*get_mctrl)(struct uart_port *);
@@ -74,6 +74,7 @@ struct uart_8250_port;
struct uart_8250_ops {
int (*setup_irq)(struct uart_8250_port *);
void (*release_irq)(struct uart_8250_port *);
+ void (*setup_timer)(struct uart_8250_port *);
};
struct uart_8250_em485 {
@@ -157,7 +158,7 @@ extern int early_serial8250_setup(struct earlycon_device *device,
extern void serial8250_update_uartclk(struct uart_port *port,
unsigned int uartclk);
extern void serial8250_do_set_termios(struct uart_port *port,
- struct ktermios *termios, struct ktermios *old);
+ struct ktermios *termios, const struct ktermios *old);
extern void serial8250_do_set_ldisc(struct uart_port *port,
struct ktermios *termios);
extern unsigned int serial8250_do_get_mctrl(struct uart_port *port);
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index aef3145f2032..91871464b99d 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -141,6 +141,14 @@ struct gpio_desc;
* Locking: none.
* Interrupts: caller dependent.
*
+ * @start_rx: ``void ()(struct uart_port *port)``
+ *
+ * Start receiving characters.
+ *
+ * Locking: @port->lock taken.
+ * Interrupts: locally disabled.
+ * This call must not sleep
+ *
* @stop_rx: ``void ()(struct uart_port *port)``
*
* Stop receiving characters; the @port is in the process of being closed.
@@ -379,7 +387,7 @@ struct uart_ops {
void (*shutdown)(struct uart_port *);
void (*flush_buffer)(struct uart_port *);
void (*set_termios)(struct uart_port *, struct ktermios *new,
- struct ktermios *old);
+ const struct ktermios *old);
void (*set_ldisc)(struct uart_port *, struct ktermios *);
void (*pm)(struct uart_port *, unsigned int state,
unsigned int oldstate);
@@ -414,7 +422,7 @@ struct uart_icount {
__u32 buf_overrun;
};
-typedef unsigned int __bitwise upf_t;
+typedef u64 __bitwise upf_t;
typedef unsigned int __bitwise upstat_t;
struct uart_port {
@@ -425,7 +433,7 @@ struct uart_port {
void (*serial_out)(struct uart_port *, int, int);
void (*set_termios)(struct uart_port *,
struct ktermios *new,
- struct ktermios *old);
+ const struct ktermios *old);
void (*set_ldisc)(struct uart_port *,
struct ktermios *);
unsigned int (*get_mctrl)(struct uart_port *);
@@ -505,23 +513,24 @@ struct uart_port {
#define UPF_BUGGY_UART ((__force upf_t) ASYNC_BUGGY_UART /* 14 */ )
#define UPF_MAGIC_MULTIPLIER ((__force upf_t) ASYNC_MAGIC_MULTIPLIER /* 16 */ )
-#define UPF_NO_THRE_TEST ((__force upf_t) (1 << 19))
+#define UPF_NO_THRE_TEST ((__force upf_t) BIT_ULL(19))
/* Port has hardware-assisted h/w flow control */
-#define UPF_AUTO_CTS ((__force upf_t) (1 << 20))
-#define UPF_AUTO_RTS ((__force upf_t) (1 << 21))
+#define UPF_AUTO_CTS ((__force upf_t) BIT_ULL(20))
+#define UPF_AUTO_RTS ((__force upf_t) BIT_ULL(21))
#define UPF_HARD_FLOW ((__force upf_t) (UPF_AUTO_CTS | UPF_AUTO_RTS))
/* Port has hardware-assisted s/w flow control */
-#define UPF_SOFT_FLOW ((__force upf_t) (1 << 22))
-#define UPF_CONS_FLOW ((__force upf_t) (1 << 23))
-#define UPF_SHARE_IRQ ((__force upf_t) (1 << 24))
-#define UPF_EXAR_EFR ((__force upf_t) (1 << 25))
-#define UPF_BUG_THRE ((__force upf_t) (1 << 26))
+#define UPF_SOFT_FLOW ((__force upf_t) BIT_ULL(22))
+#define UPF_CONS_FLOW ((__force upf_t) BIT_ULL(23))
+#define UPF_SHARE_IRQ ((__force upf_t) BIT_ULL(24))
+#define UPF_EXAR_EFR ((__force upf_t) BIT_ULL(25))
+#define UPF_BUG_THRE ((__force upf_t) BIT_ULL(26))
/* The exact UART type is known and should not be probed. */
-#define UPF_FIXED_TYPE ((__force upf_t) (1 << 27))
-#define UPF_BOOT_AUTOCONF ((__force upf_t) (1 << 28))
-#define UPF_FIXED_PORT ((__force upf_t) (1 << 29))
-#define UPF_DEAD ((__force upf_t) (1 << 30))
-#define UPF_IOREMAP ((__force upf_t) (1 << 31))
+#define UPF_FIXED_TYPE ((__force upf_t) BIT_ULL(27))
+#define UPF_BOOT_AUTOCONF ((__force upf_t) BIT_ULL(28))
+#define UPF_FIXED_PORT ((__force upf_t) BIT_ULL(29))
+#define UPF_DEAD ((__force upf_t) BIT_ULL(30))
+#define UPF_IOREMAP ((__force upf_t) BIT_ULL(31))
+#define UPF_FULL_PROBE ((__force upf_t) BIT_ULL(32))
#define __UPF_CHANGE_MASK 0x17fff
#define UPF_CHANGE_MASK ((__force upf_t) __UPF_CHANGE_MASK)
@@ -616,6 +625,23 @@ struct uart_state {
/* number of characters left in xmit buffer before we ask for more */
#define WAKEUP_CHARS 256
+/**
+ * uart_xmit_advance - Advance xmit buffer and account Tx'ed chars
+ * @up: uart_port structure describing the port
+ * @chars: number of characters sent
+ *
+ * This function advances the tail of circular xmit buffer by the number of
+ * @chars transmitted and handles accounting of transmitted bytes (into
+ * @up's icount.tx).
+ */
+static inline void uart_xmit_advance(struct uart_port *up, unsigned int chars)
+{
+ struct circ_buf *xmit = &up->state->xmit;
+
+ xmit->tail = (xmit->tail + chars) & (UART_XMIT_SIZE - 1);
+ up->icount.tx += chars;
+}
+
struct module;
struct tty_driver;
@@ -644,7 +670,7 @@ void uart_write_wakeup(struct uart_port *port);
void uart_update_timeout(struct uart_port *port, unsigned int cflag,
unsigned int baud);
unsigned int uart_get_baud_rate(struct uart_port *port, struct ktermios *termios,
- struct ktermios *old, unsigned int min,
+ const struct ktermios *old, unsigned int min,
unsigned int max);
unsigned int uart_get_divisor(struct uart_port *port, unsigned int baud);
@@ -717,9 +743,15 @@ static const bool earlycon_acpi_spcr_enable EARLYCON_USED_OR_UNUSED;
static inline int setup_earlycon(char *buf) { return 0; }
#endif
-static inline bool uart_console_enabled(struct uart_port *port)
+/* Variant of uart_console_registered() when the console_list_lock is held. */
+static inline bool uart_console_registered_locked(struct uart_port *port)
+{
+ return uart_console(port) && console_is_registered_locked(port->cons);
+}
+
+static inline bool uart_console_registered(struct uart_port *port)
{
- return uart_console(port) && (port->cons->flags & CON_ENABLED);
+ return uart_console(port) && console_is_registered(port->cons);
}
struct uart_port *uart_get_console(struct uart_port *ports, int nr,
@@ -925,5 +957,4 @@ static inline int uart_handle_break(struct uart_port *port)
!((cflag) & CLOCAL))
int uart_get_rs485_mode(struct uart_port *port);
-int uart_rs485_config(struct uart_port *port);
#endif /* LINUX_SERIAL_CORE_H */
diff --git a/include/linux/sfp.h b/include/linux/sfp.h
index 302094b855fb..52b98f9666a2 100644
--- a/include/linux/sfp.h
+++ b/include/linux/sfp.h
@@ -332,39 +332,33 @@ enum {
/* SFP EEPROM registers */
enum {
- SFP_PHYS_ID = 0x00,
- SFP_PHYS_EXT_ID = 0x01,
- SFP_CONNECTOR = 0x02,
- SFP_COMPLIANCE = 0x03,
- SFP_ENCODING = 0x0b,
- SFP_BR_NOMINAL = 0x0c,
- SFP_RATE_ID = 0x0d,
- SFP_LINK_LEN_SM_KM = 0x0e,
- SFP_LINK_LEN_SM_100M = 0x0f,
- SFP_LINK_LEN_50UM_OM2_10M = 0x10,
- SFP_LINK_LEN_62_5UM_OM1_10M = 0x11,
- SFP_LINK_LEN_COPPER_1M = 0x12,
- SFP_LINK_LEN_50UM_OM4_10M = 0x12,
- SFP_LINK_LEN_50UM_OM3_10M = 0x13,
- SFP_VENDOR_NAME = 0x14,
- SFP_VENDOR_OUI = 0x25,
- SFP_VENDOR_PN = 0x28,
- SFP_VENDOR_REV = 0x38,
- SFP_OPTICAL_WAVELENGTH_MSB = 0x3c,
- SFP_OPTICAL_WAVELENGTH_LSB = 0x3d,
- SFP_CABLE_SPEC = 0x3c,
- SFP_CC_BASE = 0x3f,
- SFP_OPTIONS = 0x40, /* 2 bytes, MSB, LSB */
- SFP_BR_MAX = 0x42,
- SFP_BR_MIN = 0x43,
- SFP_VENDOR_SN = 0x44,
- SFP_DATECODE = 0x54,
- SFP_DIAGMON = 0x5c,
- SFP_ENHOPTS = 0x5d,
- SFP_SFF8472_COMPLIANCE = 0x5e,
- SFP_CC_EXT = 0x5f,
+ SFP_PHYS_ID = 0,
+ SFP_PHYS_EXT_ID = 1,
SFP_PHYS_EXT_ID_SFP = 0x04,
+
+ SFP_CONNECTOR = 2,
+ SFP_COMPLIANCE = 3,
+ SFP_ENCODING = 11,
+ SFP_BR_NOMINAL = 12,
+ SFP_RATE_ID = 13,
+ SFP_LINK_LEN_SM_KM = 14,
+ SFP_LINK_LEN_SM_100M = 15,
+ SFP_LINK_LEN_50UM_OM2_10M = 16,
+ SFP_LINK_LEN_62_5UM_OM1_10M = 17,
+ SFP_LINK_LEN_COPPER_1M = 18,
+ SFP_LINK_LEN_50UM_OM4_10M = 18,
+ SFP_LINK_LEN_50UM_OM3_10M = 19,
+ SFP_VENDOR_NAME = 20,
+ SFP_VENDOR_OUI = 37,
+ SFP_VENDOR_PN = 40,
+ SFP_VENDOR_REV = 56,
+ SFP_OPTICAL_WAVELENGTH_MSB = 60,
+ SFP_OPTICAL_WAVELENGTH_LSB = 61,
+ SFP_CABLE_SPEC = 60,
+ SFP_CC_BASE = 63,
+
+ SFP_OPTIONS = 64, /* 2 bytes, MSB, LSB */
SFP_OPTIONS_HIGH_POWER_LEVEL = BIT(13),
SFP_OPTIONS_PAGING_A2 = BIT(12),
SFP_OPTIONS_RETIMER = BIT(11),
@@ -378,11 +372,20 @@ enum {
SFP_OPTIONS_TX_FAULT = BIT(3),
SFP_OPTIONS_LOS_INVERTED = BIT(2),
SFP_OPTIONS_LOS_NORMAL = BIT(1),
+
+ SFP_BR_MAX = 66,
+ SFP_BR_MIN = 67,
+ SFP_VENDOR_SN = 68,
+ SFP_DATECODE = 84,
+
+ SFP_DIAGMON = 92,
SFP_DIAGMON_DDM = BIT(6),
SFP_DIAGMON_INT_CAL = BIT(5),
SFP_DIAGMON_EXT_CAL = BIT(4),
SFP_DIAGMON_RXPWR_AVG = BIT(3),
SFP_DIAGMON_ADDRMODE = BIT(2),
+
+ SFP_ENHOPTS = 93,
SFP_ENHOPTS_ALARMWARN = BIT(7),
SFP_ENHOPTS_SOFT_TX_DISABLE = BIT(6),
SFP_ENHOPTS_SOFT_TX_FAULT = BIT(5),
@@ -390,6 +393,8 @@ enum {
SFP_ENHOPTS_SOFT_RATE_SELECT = BIT(3),
SFP_ENHOPTS_APP_SELECT_SFF8079 = BIT(2),
SFP_ENHOPTS_SOFT_RATE_SFF8431 = BIT(1),
+
+ SFP_SFF8472_COMPLIANCE = 94,
SFP_SFF8472_COMPLIANCE_NONE = 0x00,
SFP_SFF8472_COMPLIANCE_REV9_3 = 0x01,
SFP_SFF8472_COMPLIANCE_REV9_5 = 0x02,
@@ -399,68 +404,70 @@ enum {
SFP_SFF8472_COMPLIANCE_REV11_3 = 0x06,
SFP_SFF8472_COMPLIANCE_REV11_4 = 0x07,
SFP_SFF8472_COMPLIANCE_REV12_0 = 0x08,
+
+ SFP_CC_EXT = 95,
};
/* SFP Diagnostics */
enum {
/* Alarm and warnings stored MSB at lower address then LSB */
- SFP_TEMP_HIGH_ALARM = 0x00,
- SFP_TEMP_LOW_ALARM = 0x02,
- SFP_TEMP_HIGH_WARN = 0x04,
- SFP_TEMP_LOW_WARN = 0x06,
- SFP_VOLT_HIGH_ALARM = 0x08,
- SFP_VOLT_LOW_ALARM = 0x0a,
- SFP_VOLT_HIGH_WARN = 0x0c,
- SFP_VOLT_LOW_WARN = 0x0e,
- SFP_BIAS_HIGH_ALARM = 0x10,
- SFP_BIAS_LOW_ALARM = 0x12,
- SFP_BIAS_HIGH_WARN = 0x14,
- SFP_BIAS_LOW_WARN = 0x16,
- SFP_TXPWR_HIGH_ALARM = 0x18,
- SFP_TXPWR_LOW_ALARM = 0x1a,
- SFP_TXPWR_HIGH_WARN = 0x1c,
- SFP_TXPWR_LOW_WARN = 0x1e,
- SFP_RXPWR_HIGH_ALARM = 0x20,
- SFP_RXPWR_LOW_ALARM = 0x22,
- SFP_RXPWR_HIGH_WARN = 0x24,
- SFP_RXPWR_LOW_WARN = 0x26,
- SFP_LASER_TEMP_HIGH_ALARM = 0x28,
- SFP_LASER_TEMP_LOW_ALARM = 0x2a,
- SFP_LASER_TEMP_HIGH_WARN = 0x2c,
- SFP_LASER_TEMP_LOW_WARN = 0x2e,
- SFP_TEC_CUR_HIGH_ALARM = 0x30,
- SFP_TEC_CUR_LOW_ALARM = 0x32,
- SFP_TEC_CUR_HIGH_WARN = 0x34,
- SFP_TEC_CUR_LOW_WARN = 0x36,
- SFP_CAL_RXPWR4 = 0x38,
- SFP_CAL_RXPWR3 = 0x3c,
- SFP_CAL_RXPWR2 = 0x40,
- SFP_CAL_RXPWR1 = 0x44,
- SFP_CAL_RXPWR0 = 0x48,
- SFP_CAL_TXI_SLOPE = 0x4c,
- SFP_CAL_TXI_OFFSET = 0x4e,
- SFP_CAL_TXPWR_SLOPE = 0x50,
- SFP_CAL_TXPWR_OFFSET = 0x52,
- SFP_CAL_T_SLOPE = 0x54,
- SFP_CAL_T_OFFSET = 0x56,
- SFP_CAL_V_SLOPE = 0x58,
- SFP_CAL_V_OFFSET = 0x5a,
- SFP_CHKSUM = 0x5f,
-
- SFP_TEMP = 0x60,
- SFP_VCC = 0x62,
- SFP_TX_BIAS = 0x64,
- SFP_TX_POWER = 0x66,
- SFP_RX_POWER = 0x68,
- SFP_LASER_TEMP = 0x6a,
- SFP_TEC_CUR = 0x6c,
-
- SFP_STATUS = 0x6e,
+ SFP_TEMP_HIGH_ALARM = 0,
+ SFP_TEMP_LOW_ALARM = 2,
+ SFP_TEMP_HIGH_WARN = 4,
+ SFP_TEMP_LOW_WARN = 6,
+ SFP_VOLT_HIGH_ALARM = 8,
+ SFP_VOLT_LOW_ALARM = 10,
+ SFP_VOLT_HIGH_WARN = 12,
+ SFP_VOLT_LOW_WARN = 14,
+ SFP_BIAS_HIGH_ALARM = 16,
+ SFP_BIAS_LOW_ALARM = 18,
+ SFP_BIAS_HIGH_WARN = 20,
+ SFP_BIAS_LOW_WARN = 22,
+ SFP_TXPWR_HIGH_ALARM = 24,
+ SFP_TXPWR_LOW_ALARM = 26,
+ SFP_TXPWR_HIGH_WARN = 28,
+ SFP_TXPWR_LOW_WARN = 30,
+ SFP_RXPWR_HIGH_ALARM = 32,
+ SFP_RXPWR_LOW_ALARM = 34,
+ SFP_RXPWR_HIGH_WARN = 36,
+ SFP_RXPWR_LOW_WARN = 38,
+ SFP_LASER_TEMP_HIGH_ALARM = 40,
+ SFP_LASER_TEMP_LOW_ALARM = 42,
+ SFP_LASER_TEMP_HIGH_WARN = 44,
+ SFP_LASER_TEMP_LOW_WARN = 46,
+ SFP_TEC_CUR_HIGH_ALARM = 48,
+ SFP_TEC_CUR_LOW_ALARM = 50,
+ SFP_TEC_CUR_HIGH_WARN = 52,
+ SFP_TEC_CUR_LOW_WARN = 54,
+ SFP_CAL_RXPWR4 = 56,
+ SFP_CAL_RXPWR3 = 60,
+ SFP_CAL_RXPWR2 = 64,
+ SFP_CAL_RXPWR1 = 68,
+ SFP_CAL_RXPWR0 = 72,
+ SFP_CAL_TXI_SLOPE = 76,
+ SFP_CAL_TXI_OFFSET = 78,
+ SFP_CAL_TXPWR_SLOPE = 80,
+ SFP_CAL_TXPWR_OFFSET = 82,
+ SFP_CAL_T_SLOPE = 84,
+ SFP_CAL_T_OFFSET = 86,
+ SFP_CAL_V_SLOPE = 88,
+ SFP_CAL_V_OFFSET = 90,
+ SFP_CHKSUM = 95,
+
+ SFP_TEMP = 96,
+ SFP_VCC = 98,
+ SFP_TX_BIAS = 100,
+ SFP_TX_POWER = 102,
+ SFP_RX_POWER = 104,
+ SFP_LASER_TEMP = 106,
+ SFP_TEC_CUR = 108,
+
+ SFP_STATUS = 110,
SFP_STATUS_TX_DISABLE = BIT(7),
SFP_STATUS_TX_DISABLE_FORCE = BIT(6),
SFP_STATUS_TX_FAULT = BIT(2),
SFP_STATUS_RX_LOS = BIT(1),
- SFP_ALARM0 = 0x70,
+ SFP_ALARM0 = 112,
SFP_ALARM0_TEMP_HIGH = BIT(7),
SFP_ALARM0_TEMP_LOW = BIT(6),
SFP_ALARM0_VCC_HIGH = BIT(5),
@@ -470,11 +477,11 @@ enum {
SFP_ALARM0_TXPWR_HIGH = BIT(1),
SFP_ALARM0_TXPWR_LOW = BIT(0),
- SFP_ALARM1 = 0x71,
+ SFP_ALARM1 = 113,
SFP_ALARM1_RXPWR_HIGH = BIT(7),
SFP_ALARM1_RXPWR_LOW = BIT(6),
- SFP_WARN0 = 0x74,
+ SFP_WARN0 = 116,
SFP_WARN0_TEMP_HIGH = BIT(7),
SFP_WARN0_TEMP_LOW = BIT(6),
SFP_WARN0_VCC_HIGH = BIT(5),
@@ -484,13 +491,15 @@ enum {
SFP_WARN0_TXPWR_HIGH = BIT(1),
SFP_WARN0_TXPWR_LOW = BIT(0),
- SFP_WARN1 = 0x75,
+ SFP_WARN1 = 117,
SFP_WARN1_RXPWR_HIGH = BIT(7),
SFP_WARN1_RXPWR_LOW = BIT(6),
- SFP_EXT_STATUS = 0x76,
- SFP_VSL = 0x78,
- SFP_PAGE = 0x7f,
+ SFP_EXT_STATUS = 118,
+ SFP_EXT_STATUS_PWRLVL_SELECT = BIT(0),
+
+ SFP_VSL = 120,
+ SFP_PAGE = 127,
};
struct fwnode_handle;
@@ -535,7 +544,7 @@ int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
unsigned long *support);
bool sfp_may_have_phy(struct sfp_bus *bus, const struct sfp_eeprom_id *id);
void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
- unsigned long *support);
+ unsigned long *support, unsigned long *interfaces);
phy_interface_t sfp_select_interface(struct sfp_bus *bus,
unsigned long *link_modes);
@@ -568,7 +577,8 @@ static inline bool sfp_may_have_phy(struct sfp_bus *bus,
static inline void sfp_parse_support(struct sfp_bus *bus,
const struct sfp_eeprom_id *id,
- unsigned long *support)
+ unsigned long *support,
+ unsigned long *interfaces)
{
}
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 1b6c4013f691..d500ea967dc7 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -29,15 +29,10 @@ struct shmem_inode_info {
struct inode vfs_inode;
};
-#define SHMEM_FL_USER_VISIBLE FS_FL_USER_VISIBLE
-#define SHMEM_FL_USER_MODIFIABLE FS_FL_USER_MODIFIABLE
-#define SHMEM_FL_INHERITED FS_FL_USER_MODIFIABLE
-
-/* Flags that are appropriate for regular files (all but dir-specific ones). */
-#define SHMEM_REG_FLMASK (~(FS_DIRSYNC_FL | FS_TOPDIR_FL))
-
-/* Flags that are appropriate for non-directories/regular files. */
-#define SHMEM_OTHER_FLMASK (FS_NODUMP_FL | FS_NOATIME_FL)
+#define SHMEM_FL_USER_VISIBLE FS_FL_USER_VISIBLE
+#define SHMEM_FL_USER_MODIFIABLE \
+ (FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL | FS_NOATIME_FL)
+#define SHMEM_FL_INHERITED (FS_NODUMP_FL | FS_NOATIME_FL)
struct shmem_sb_info {
unsigned long max_blocks; /* How many blocks are allowed */
@@ -97,17 +92,19 @@ extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
int shmem_unuse(unsigned int type);
-extern bool shmem_is_huge(struct vm_area_struct *vma,
- struct inode *inode, pgoff_t index);
-static inline bool shmem_huge_enabled(struct vm_area_struct *vma)
+extern bool shmem_is_huge(struct vm_area_struct *vma, struct inode *inode,
+ pgoff_t index, bool shmem_huge_force);
+static inline bool shmem_huge_enabled(struct vm_area_struct *vma,
+ bool shmem_huge_force)
{
- return shmem_is_huge(vma, file_inode(vma->vm_file), vma->vm_pgoff);
+ return shmem_is_huge(vma, file_inode(vma->vm_file), vma->vm_pgoff,
+ shmem_huge_force);
}
extern unsigned long shmem_swap_usage(struct vm_area_struct *vma);
extern unsigned long shmem_partial_swap_usage(struct address_space *mapping,
pgoff_t start, pgoff_t end);
-/* Flag allocation requirements to shmem_getpage */
+/* Flag allocation requirements to shmem_get_folio */
enum sgp_type {
SGP_READ, /* don't exceed i_size, don't allocate page */
SGP_NOALLOC, /* similar, but fail on hole or use fallocated page */
@@ -116,8 +113,8 @@ enum sgp_type {
SGP_FALLOC, /* like SGP_WRITE, but make existing page Uptodate */
};
-extern int shmem_getpage(struct inode *inode, pgoff_t index,
- struct page **pagep, enum sgp_type sgp);
+int shmem_get_folio(struct inode *inode, pgoff_t index, struct folio **foliop,
+ enum sgp_type sgp);
static inline struct page *shmem_read_mapping_page(
struct address_space *mapping, pgoff_t index)
diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index 08e6054e061f..71310efe2fab 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -2,6 +2,9 @@
#ifndef _LINUX_SHRINKER_H
#define _LINUX_SHRINKER_H
+#include <linux/atomic.h>
+#include <linux/types.h>
+
/*
* This struct is used to pass information from page reclaim to the shrinkers.
* We consolidate the values for easier extension later.
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index ca8afa382bf2..4c8492401a10 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -533,6 +533,13 @@ enum {
struct ubuf_info {
void (*callback)(struct sk_buff *, struct ubuf_info *,
bool zerocopy_success);
+ refcount_t refcnt;
+ u8 flags;
+};
+
+struct ubuf_info_msgzc {
+ struct ubuf_info ubuf;
+
union {
struct {
unsigned long desc;
@@ -545,8 +552,6 @@ struct ubuf_info {
u32 bytelen;
};
};
- refcount_t refcnt;
- u8 flags;
struct mmpin {
struct user_struct *user;
@@ -555,6 +560,8 @@ struct ubuf_info {
};
#define skb_uarg(SKB) ((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg))
+#define uarg_to_msgzc(ubuf_ptr) container_of((ubuf_ptr), struct ubuf_info_msgzc, \
+ ubuf)
int mm_account_pinned_pages(struct mmpin *mmp, size_t size);
void mm_unaccount_pinned_pages(struct mmpin *mmp);
@@ -796,6 +803,7 @@ typedef unsigned char *sk_buff_data_t;
* @csum_level: indicates the number of consecutive checksums found in
* the packet minus one that have been verified as
* CHECKSUM_UNNECESSARY (max 3)
+ * @scm_io_uring: SKB holds io_uring registered files
* @dst_pending_confirm: need to confirm neighbour
* @decrypted: Decrypted SKB
* @slow_gro: state present at GRO time, slower prepare step required
@@ -810,7 +818,7 @@ typedef unsigned char *sk_buff_data_t;
* @mark: Generic packet mark
* @reserved_tailroom: (aka @mark) number of bytes of free space available
* at the tail of an sk_buff
- * @vlan_present: VLAN tag is present
+ * @vlan_all: vlan fields (proto & tci)
* @vlan_proto: vlan encapsulation protocol
* @vlan_tci: vlan tag control information
* @inner_protocol: Protocol (encapsulation)
@@ -943,7 +951,7 @@ struct sk_buff {
/* private: */
__u8 __pkt_vlan_present_offset[0];
/* public: */
- __u8 vlan_present:1; /* See PKT_VLAN_PRESENT_BIT */
+ __u8 remcsum_offload:1;
__u8 csum_complete_sw:1;
__u8 csum_level:2;
__u8 dst_pending_confirm:1;
@@ -958,7 +966,6 @@ struct sk_buff {
__u8 ipvs_property:1;
__u8 inner_protocol_type:1;
- __u8 remcsum_offload:1;
#ifdef CONFIG_NET_SWITCHDEV
__u8 offload_fwd_mark:1;
__u8 offload_l3_fwd_mark:1;
@@ -975,6 +982,7 @@ struct sk_buff {
#endif
__u8 slow_gro:1;
__u8 csum_not_inet:1;
+ __u8 scm_io_uring:1;
#ifdef CONFIG_NET_SCHED
__u16 tc_index; /* traffic control index */
@@ -990,8 +998,13 @@ struct sk_buff {
__u32 priority;
int skb_iif;
__u32 hash;
- __be16 vlan_proto;
- __u16 vlan_tci;
+ union {
+ u32 vlan_all;
+ struct {
+ __be16 vlan_proto;
+ __u16 vlan_tci;
+ };
+ };
#if defined(CONFIG_NET_RX_BUSY_POLL) || defined(CONFIG_XPS)
union {
unsigned int napi_id;
@@ -1050,15 +1063,13 @@ struct sk_buff {
#endif
#define PKT_TYPE_OFFSET offsetof(struct sk_buff, __pkt_type_offset)
-/* if you move pkt_vlan_present, tc_at_ingress, or mono_delivery_time
+/* if you move tc_at_ingress or mono_delivery_time
* around, you also must adapt these constants.
*/
#ifdef __BIG_ENDIAN_BITFIELD
-#define PKT_VLAN_PRESENT_BIT 7
#define TC_AT_INGRESS_MASK (1 << 0)
#define SKB_MONO_DELIVERY_TIME_MASK (1 << 2)
#else
-#define PKT_VLAN_PRESENT_BIT 0
#define TC_AT_INGRESS_MASK (1 << 7)
#define SKB_MONO_DELIVERY_TIME_MASK (1 << 5)
#endif
@@ -1195,7 +1206,8 @@ static inline bool skb_unref(struct sk_buff *skb)
return true;
}
-void kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason);
+void __fix_address
+kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason);
/**
* kfree_skb - free an sk_buff with 'NOT_SPECIFIED' reason
@@ -1243,6 +1255,7 @@ struct sk_buff *build_skb_around(struct sk_buff *skb,
void skb_attempt_defer_free(struct sk_buff *skb);
struct sk_buff *napi_build_skb(void *data, unsigned int frag_size);
+struct sk_buff *slab_build_skb(void *data);
/**
* alloc_skb - allocate a network buffer
@@ -1460,8 +1473,8 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
unsigned int key_count);
struct bpf_flow_dissector;
-bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
- __be16 proto, int nhoff, int hlen, unsigned int flags);
+u32 bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
+ __be16 proto, int nhoff, int hlen, unsigned int flags);
bool __skb_flow_dissect(const struct net *net,
const struct sk_buff *skb,
@@ -2444,6 +2457,27 @@ static inline void skb_fill_page_desc(struct sk_buff *skb, int i,
skb_shinfo(skb)->nr_frags = i + 1;
}
+/**
+ * skb_fill_page_desc_noacc - initialise a paged fragment in an skb
+ * @skb: buffer containing fragment to be initialised
+ * @i: paged fragment index to initialise
+ * @page: the page to use for this fragment
+ * @off: the offset to the data with @page
+ * @size: the length of the data
+ *
+ * Variant of skb_fill_page_desc() which does not deal with
+ * pfmemalloc, if page is not owned by us.
+ */
+static inline void skb_fill_page_desc_noacc(struct sk_buff *skb, int i,
+ struct page *page, int off,
+ int size)
+{
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+
+ __skb_fill_page_desc_noacc(shinfo, i, page, off, size);
+ shinfo->nr_frags = i + 1;
+}
+
void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
int size, unsigned int truesize);
@@ -2587,20 +2621,6 @@ void *skb_pull_data(struct sk_buff *skb, size_t len);
void *__pskb_pull_tail(struct sk_buff *skb, int delta);
-static inline void *__pskb_pull(struct sk_buff *skb, unsigned int len)
-{
- if (len > skb_headlen(skb) &&
- !__pskb_pull_tail(skb, len - skb_headlen(skb)))
- return NULL;
- skb->len -= len;
- return skb->data += len;
-}
-
-static inline void *pskb_pull(struct sk_buff *skb, unsigned int len)
-{
- return unlikely(len > skb->len) ? NULL : __pskb_pull(skb, len);
-}
-
static inline bool pskb_may_pull(struct sk_buff *skb, unsigned int len)
{
if (likely(len <= skb_headlen(skb)))
@@ -2610,6 +2630,15 @@ static inline bool pskb_may_pull(struct sk_buff *skb, unsigned int len)
return __pskb_pull_tail(skb, len - skb_headlen(skb)) != NULL;
}
+static inline void *pskb_pull(struct sk_buff *skb, unsigned int len)
+{
+ if (!pskb_may_pull(skb, len))
+ return NULL;
+
+ skb->len -= len;
+ return skb->data += len;
+}
+
void skb_condense(struct sk_buff *skb);
/**
@@ -5024,12 +5053,5 @@ static inline void skb_mark_for_recycle(struct sk_buff *skb)
}
#endif
-static inline bool skb_pp_recycle(struct sk_buff *skb, void *data)
-{
- if (!IS_ENABLED(CONFIG_PAGE_POOL) || !skb->pp_recycle)
- return false;
- return page_pool_return_skb_page(virt_to_page(data));
-}
-
#endif /* __KERNEL__ */
#endif /* _LINUX_SKBUFF_H */
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 48f4b645193b..84f787416a54 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -82,6 +82,7 @@ struct sk_psock {
u32 apply_bytes;
u32 cork_bytes;
u32 eval;
+ bool redir_ingress; /* undefined if sk_redir is null */
struct sk_msg *cork;
struct sk_psock_progs progs;
#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
@@ -376,7 +377,7 @@ static inline void sk_psock_report_error(struct sk_psock *psock, int err)
}
struct sk_psock *sk_psock_init(struct sock *sk, int node);
-void sk_psock_stop(struct sk_psock *psock, bool wait);
+void sk_psock_stop(struct sk_psock *psock);
#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock);
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 0fefdf528e0d..45af70315a94 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -29,6 +29,8 @@
#define SLAB_RED_ZONE ((slab_flags_t __force)0x00000400U)
/* DEBUG: Poison objects */
#define SLAB_POISON ((slab_flags_t __force)0x00000800U)
+/* Indicate a kmalloc slab */
+#define SLAB_KMALLOC ((slab_flags_t __force)0x00001000U)
/* Align objs on cache lines */
#define SLAB_HWCACHE_ALIGN ((slab_flags_t __force)0x00002000U)
/* Use GFP_DMA memory */
@@ -74,6 +76,17 @@
* rcu_read_lock before reading the address, then rcu_read_unlock after
* taking the spinlock within the structure expected at that address.
*
+ * Note that it is not possible to acquire a lock within a structure
+ * allocated with SLAB_TYPESAFE_BY_RCU without first acquiring a reference
+ * as described above. The reason is that SLAB_TYPESAFE_BY_RCU pages
+ * are not zeroed before being given to the slab, which means that any
+ * locks must be initialized after each and every kmem_struct_alloc().
+ * Alternatively, make the ctor passed to kmem_cache_create() initialize
+ * the locks at page-allocation time, as is done in __i915_request_ctor(),
+ * sighand_ctor(), and anon_vma_ctor(). Such a ctor permits readers
+ * to safely acquire those ctor-initialized locks under rcu_read_lock()
+ * protection.
+ *
* Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU.
*/
/* Defer freeing slabs to RCU */
@@ -106,7 +119,7 @@
# define SLAB_ACCOUNT 0
#endif
-#ifdef CONFIG_KASAN
+#ifdef CONFIG_KASAN_GENERIC
#define SLAB_KASAN ((slab_flags_t __force)0x08000000U)
#else
#define SLAB_KASAN 0
@@ -119,9 +132,19 @@
*/
#define SLAB_NO_USER_FLAGS ((slab_flags_t __force)0x10000000U)
+#ifdef CONFIG_KFENCE
+#define SLAB_SKIP_KFENCE ((slab_flags_t __force)0x20000000U)
+#else
+#define SLAB_SKIP_KFENCE 0
+#endif
+
/* The following flags affect the page allocator grouping pages by mobility */
/* Objects are reclaimable */
+#ifndef CONFIG_SLUB_TINY
#define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0x00020000U)
+#else
+#define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0)
+#endif
#define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */
/*
@@ -184,11 +207,25 @@ int kmem_cache_shrink(struct kmem_cache *s);
/*
* Common kmalloc functions provided by all allocators
*/
-void * __must_check krealloc(const void *objp, size_t new_size, gfp_t flags) __alloc_size(2);
+void * __must_check krealloc(const void *objp, size_t new_size, gfp_t flags) __realloc_size(2);
void kfree(const void *objp);
void kfree_sensitive(const void *objp);
size_t __ksize(const void *objp);
+
+/**
+ * ksize - Report actual allocation size of associated object
+ *
+ * @objp: Pointer returned from a prior kmalloc()-family allocation.
+ *
+ * This should not be used for writing beyond the originally requested
+ * allocation size. Either use krealloc() or round up the allocation size
+ * with kmalloc_size_roundup() prior to allocation. If this is used to
+ * access beyond the originally requested allocation size, UBSAN_BOUNDS
+ * and/or FORTIFY_SOURCE may trip, since they only know about the
+ * originally allocated size via the __alloc_size attribute.
+ */
size_t ksize(const void *objp);
+
#ifdef CONFIG_PRINTK
bool kmem_valid_obj(void *object);
void kmem_dump_obj(void *object);
@@ -243,27 +280,17 @@ static inline unsigned int arch_slab_minalign(void)
#ifdef CONFIG_SLAB
/*
- * The largest kmalloc size supported by the SLAB allocators is
- * 32 megabyte (2^25) or the maximum allocatable page order if that is
- * less than 32 MB.
- *
- * WARNING: Its not easy to increase this value since the allocators have
- * to do various tricks to work around compiler limitations in order to
- * ensure proper constant folding.
+ * SLAB and SLUB directly allocates requests fitting in to an order-1 page
+ * (PAGE_SIZE*2). Larger requests are passed to the page allocator.
*/
-#define KMALLOC_SHIFT_HIGH ((MAX_ORDER + PAGE_SHIFT - 1) <= 25 ? \
- (MAX_ORDER + PAGE_SHIFT - 1) : 25)
-#define KMALLOC_SHIFT_MAX KMALLOC_SHIFT_HIGH
+#define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1)
+#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT - 1)
#ifndef KMALLOC_SHIFT_LOW
#define KMALLOC_SHIFT_LOW 5
#endif
#endif
#ifdef CONFIG_SLUB
-/*
- * SLUB directly allocates requests fitting in to an order-1 page
- * (PAGE_SIZE*2). Larger requests are passed to the page allocator.
- */
#define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1)
#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT - 1)
#ifndef KMALLOC_SHIFT_LOW
@@ -324,13 +351,18 @@ enum kmalloc_cache_type {
#endif
#ifndef CONFIG_MEMCG_KMEM
KMALLOC_CGROUP = KMALLOC_NORMAL,
-#else
- KMALLOC_CGROUP,
#endif
+#ifdef CONFIG_SLUB_TINY
+ KMALLOC_RECLAIM = KMALLOC_NORMAL,
+#else
KMALLOC_RECLAIM,
+#endif
#ifdef CONFIG_ZONE_DMA
KMALLOC_DMA,
#endif
+#ifdef CONFIG_MEMCG_KMEM
+ KMALLOC_CGROUP,
+#endif
NR_KMALLOC_TYPES
};
@@ -415,10 +447,6 @@ static __always_inline unsigned int __kmalloc_index(size_t size,
if (size <= 512 * 1024) return 19;
if (size <= 1024 * 1024) return 20;
if (size <= 2 * 1024 * 1024) return 21;
- if (size <= 4 * 1024 * 1024) return 22;
- if (size <= 8 * 1024 * 1024) return 23;
- if (size <= 16 * 1024 * 1024) return 24;
- if (size <= 32 * 1024 * 1024) return 25;
if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES) && size_is_constant)
BUILD_BUG_ON_MSG(1, "unexpected size in kmalloc_index()");
@@ -428,11 +456,23 @@ static __always_inline unsigned int __kmalloc_index(size_t size,
/* Will never be reached. Needed because the compiler may complain */
return -1;
}
+static_assert(PAGE_SHIFT <= 20);
#define kmalloc_index(s) __kmalloc_index(s, true)
#endif /* !CONFIG_SLOB */
void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_size(1);
-void *kmem_cache_alloc(struct kmem_cache *s, gfp_t flags) __assume_slab_alignment __malloc;
+
+/**
+ * kmem_cache_alloc - Allocate an object
+ * @cachep: The cache to allocate from.
+ * @flags: See kmalloc().
+ *
+ * Allocate an object from this cache.
+ * See kmem_cache_zalloc() for a shortcut of adding __GFP_ZERO to flags.
+ *
+ * Return: pointer to the new object or %NULL in case of error
+ */
+void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) __assume_slab_alignment __malloc;
void *kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
gfp_t gfpflags) __assume_slab_alignment __malloc;
void kmem_cache_free(struct kmem_cache *s, void *objp);
@@ -456,83 +496,27 @@ static __always_inline void kfree_bulk(size_t size, void **p)
kmem_cache_free_bulk(NULL, size, p);
}
-#ifdef CONFIG_NUMA
void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment
__alloc_size(1);
void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t flags, int node) __assume_slab_alignment
__malloc;
-#else
-static __always_inline __alloc_size(1) void *__kmalloc_node(size_t size, gfp_t flags, int node)
-{
- return __kmalloc(size, flags);
-}
-static __always_inline void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t flags, int node)
-{
- return kmem_cache_alloc(s, flags);
-}
-#endif
+void *kmalloc_trace(struct kmem_cache *s, gfp_t flags, size_t size)
+ __assume_kmalloc_alignment __alloc_size(3);
-#ifdef CONFIG_TRACING
-extern void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t flags, size_t size)
- __assume_slab_alignment __alloc_size(3);
-
-#ifdef CONFIG_NUMA
-extern void *kmem_cache_alloc_node_trace(struct kmem_cache *s, gfp_t gfpflags,
- int node, size_t size) __assume_slab_alignment
- __alloc_size(4);
-#else
-static __always_inline __alloc_size(4) void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
- gfp_t gfpflags, int node, size_t size)
-{
- return kmem_cache_alloc_trace(s, gfpflags, size);
-}
-#endif /* CONFIG_NUMA */
-
-#else /* CONFIG_TRACING */
-static __always_inline __alloc_size(3) void *kmem_cache_alloc_trace(struct kmem_cache *s,
- gfp_t flags, size_t size)
-{
- void *ret = kmem_cache_alloc(s, flags);
-
- ret = kasan_kmalloc(s, ret, size, flags);
- return ret;
-}
-
-static __always_inline void *kmem_cache_alloc_node_trace(struct kmem_cache *s, gfp_t gfpflags,
- int node, size_t size)
-{
- void *ret = kmem_cache_alloc_node(s, gfpflags, node);
-
- ret = kasan_kmalloc(s, ret, size, gfpflags);
- return ret;
-}
-#endif /* CONFIG_TRACING */
-
-extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment
- __alloc_size(1);
-
-#ifdef CONFIG_TRACING
-extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
- __assume_page_alignment __alloc_size(1);
-#else
-static __always_inline __alloc_size(1) void *kmalloc_order_trace(size_t size, gfp_t flags,
- unsigned int order)
-{
- return kmalloc_order(size, flags, order);
-}
-#endif
+void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags,
+ int node, size_t size) __assume_kmalloc_alignment
+ __alloc_size(4);
+void *kmalloc_large(size_t size, gfp_t flags) __assume_page_alignment
+ __alloc_size(1);
-static __always_inline __alloc_size(1) void *kmalloc_large(size_t size, gfp_t flags)
-{
- unsigned int order = get_order(size);
- return kmalloc_order_trace(size, flags, order);
-}
+void *kmalloc_large_node(size_t size, gfp_t flags, int node) __assume_page_alignment
+ __alloc_size(1);
/**
- * kmalloc - allocate memory
+ * kmalloc - allocate kernel memory
* @size: how many bytes of memory are required.
- * @flags: the type of memory to allocate.
+ * @flags: describe the allocation context
*
* kmalloc is the normal method of allocating memory
* for objects smaller than page size in the kernel.
@@ -559,12 +543,12 @@ static __always_inline __alloc_size(1) void *kmalloc_large(size_t size, gfp_t fl
* %GFP_ATOMIC
* Allocation will not sleep. May use emergency pools.
*
- * %GFP_HIGHUSER
- * Allocate memory from high memory on behalf of user.
- *
* Also it is possible to set different flags by OR'ing
* in one or more of the following additional @flags:
*
+ * %__GFP_ZERO
+ * Zero the allocated memory before returning. Also see kzalloc().
+ *
* %__GFP_HIGH
* This allocation has high priority and may use emergency pools.
*
@@ -583,45 +567,57 @@ static __always_inline __alloc_size(1) void *kmalloc_large(size_t size, gfp_t fl
* Try really hard to succeed the allocation but fail
* eventually.
*/
+#ifndef CONFIG_SLOB
static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
{
- if (__builtin_constant_p(size)) {
-#ifndef CONFIG_SLOB
+ if (__builtin_constant_p(size) && size) {
unsigned int index;
-#endif
+
if (size > KMALLOC_MAX_CACHE_SIZE)
return kmalloc_large(size, flags);
-#ifndef CONFIG_SLOB
- index = kmalloc_index(size);
-
- if (!index)
- return ZERO_SIZE_PTR;
- return kmem_cache_alloc_trace(
+ index = kmalloc_index(size);
+ return kmalloc_trace(
kmalloc_caches[kmalloc_type(flags)][index],
flags, size);
-#endif
}
return __kmalloc(size, flags);
}
+#else
+static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
+{
+ if (__builtin_constant_p(size) && size > KMALLOC_MAX_CACHE_SIZE)
+ return kmalloc_large(size, flags);
+ return __kmalloc(size, flags);
+}
+#endif
+
+#ifndef CONFIG_SLOB
static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node)
{
-#ifndef CONFIG_SLOB
- if (__builtin_constant_p(size) &&
- size <= KMALLOC_MAX_CACHE_SIZE) {
- unsigned int i = kmalloc_index(size);
+ if (__builtin_constant_p(size) && size) {
+ unsigned int index;
- if (!i)
- return ZERO_SIZE_PTR;
+ if (size > KMALLOC_MAX_CACHE_SIZE)
+ return kmalloc_large_node(size, flags, node);
- return kmem_cache_alloc_node_trace(
- kmalloc_caches[kmalloc_type(flags)][i],
- flags, node, size);
+ index = kmalloc_index(size);
+ return kmalloc_node_trace(
+ kmalloc_caches[kmalloc_type(flags)][index],
+ flags, node, size);
}
-#endif
return __kmalloc_node(size, flags, node);
}
+#else
+static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node)
+{
+ if (__builtin_constant_p(size) && size > KMALLOC_MAX_CACHE_SIZE)
+ return kmalloc_large_node(size, flags, node);
+
+ return __kmalloc_node(size, flags, node);
+}
+#endif
/**
* kmalloc_array - allocate memory for an array.
@@ -647,10 +643,10 @@ static inline __alloc_size(1, 2) void *kmalloc_array(size_t n, size_t size, gfp_
* @new_size: new size of a single member of the array
* @flags: the type of memory to allocate (see kmalloc)
*/
-static inline __alloc_size(2, 3) void * __must_check krealloc_array(void *p,
- size_t new_n,
- size_t new_size,
- gfp_t flags)
+static inline __realloc_size(2, 3) void * __must_check krealloc_array(void *p,
+ size_t new_n,
+ size_t new_size,
+ gfp_t flags)
{
size_t bytes;
@@ -671,6 +667,12 @@ static inline __alloc_size(1, 2) void *kcalloc(size_t n, size_t size, gfp_t flag
return kmalloc_array(n, size, flags | __GFP_ZERO);
}
+void *__kmalloc_node_track_caller(size_t size, gfp_t flags, int node,
+ unsigned long caller) __alloc_size(1);
+#define kmalloc_node_track_caller(size, flags, node) \
+ __kmalloc_node_track_caller(size, flags, node, \
+ _RET_IP_)
+
/*
* kmalloc_track_caller is a special version of kmalloc that records the
* calling function of the routine calling it for slab leak tracking instead
@@ -679,9 +681,9 @@ static inline __alloc_size(1, 2) void *kcalloc(size_t n, size_t size, gfp_t flag
* allocator where we care about the real place the memory allocation
* request comes from.
*/
-extern void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller);
#define kmalloc_track_caller(size, flags) \
- __kmalloc_track_caller(size, flags, _RET_IP_)
+ __kmalloc_node_track_caller(size, flags, \
+ NUMA_NO_NODE, _RET_IP_)
static inline __alloc_size(1, 2) void *kmalloc_array_node(size_t n, size_t size, gfp_t flags,
int node)
@@ -700,21 +702,6 @@ static inline __alloc_size(1, 2) void *kcalloc_node(size_t n, size_t size, gfp_t
return kmalloc_array_node(n, size, flags | __GFP_ZERO, node);
}
-
-#ifdef CONFIG_NUMA
-extern void *__kmalloc_node_track_caller(size_t size, gfp_t flags, int node,
- unsigned long caller) __alloc_size(1);
-#define kmalloc_node_track_caller(size, flags, node) \
- __kmalloc_node_track_caller(size, flags, node, \
- _RET_IP_)
-
-#else /* CONFIG_NUMA */
-
-#define kmalloc_node_track_caller(size, flags, node) \
- kmalloc_track_caller(size, flags)
-
-#endif /* CONFIG_NUMA */
-
/*
* Shortcuts
*/
@@ -774,11 +761,28 @@ static inline __alloc_size(1, 2) void *kvcalloc(size_t n, size_t size, gfp_t fla
}
extern void *kvrealloc(const void *p, size_t oldsize, size_t newsize, gfp_t flags)
- __alloc_size(3);
+ __realloc_size(3);
extern void kvfree(const void *addr);
extern void kvfree_sensitive(const void *addr, size_t len);
unsigned int kmem_cache_size(struct kmem_cache *s);
+
+/**
+ * kmalloc_size_roundup - Report allocation bucket size for the given size
+ *
+ * @size: Number of bytes to round up from.
+ *
+ * This returns the number of bytes that would be available in a kmalloc()
+ * allocation of @size bytes. For example, a 126 byte request would be
+ * rounded up to the next sized kmalloc bucket, 128 bytes. (This is strictly
+ * for the general-purpose kmalloc()-based allocations, and is not for the
+ * pre-sized kmem_cache_alloc()-based allocations.)
+ *
+ * Use this to kmalloc() the full bucket size ahead of time instead of using
+ * ksize() to query the size after an allocation.
+ */
+size_t kmalloc_size_roundup(size_t size);
+
void __init kmem_cache_init_late(void);
#if defined(CONFIG_SMP) && defined(CONFIG_SLAB)
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index e24c9aff6fed..5834bad8ad78 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -33,7 +33,6 @@ struct kmem_cache {
size_t colour; /* cache colouring range */
unsigned int colour_off; /* colour offset */
- struct kmem_cache *freelist_cache;
unsigned int freelist_size;
/* constructor func */
@@ -81,8 +80,10 @@ struct kmem_cache {
unsigned int *random_seq;
#endif
+#ifdef CONFIG_HARDENED_USERCOPY
unsigned int useroffset; /* Usercopy region offset */
unsigned int usersize; /* Usercopy region size */
+#endif
struct kmem_cache_node *node[MAX_NUMNODES];
};
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index f9c68a9dac04..aa0ee1678d29 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -41,6 +41,7 @@ enum stat_item {
CPU_PARTIAL_DRAIN, /* Drain cpu partial to node partial */
NR_SLUB_STAT_ITEMS };
+#ifndef CONFIG_SLUB_TINY
/*
* When changing the layout, make sure freelist and tid are still compatible
* with this_cpu_cmpxchg_double() alignment requirements.
@@ -57,6 +58,7 @@ struct kmem_cache_cpu {
unsigned stat[NR_SLUB_STAT_ITEMS];
#endif
};
+#endif /* CONFIG_SLUB_TINY */
#ifdef CONFIG_SLUB_CPU_PARTIAL
#define slub_percpu_partial(c) ((c)->partial)
@@ -88,7 +90,9 @@ struct kmem_cache_order_objects {
* Slab cache management.
*/
struct kmem_cache {
+#ifndef CONFIG_SLUB_TINY
struct kmem_cache_cpu __percpu *cpu_slab;
+#endif
/* Used for retrieving partial slabs, etc. */
slab_flags_t flags;
unsigned long min_partial;
@@ -136,13 +140,15 @@ struct kmem_cache {
struct kasan_cache kasan_info;
#endif
+#ifdef CONFIG_HARDENED_USERCOPY
unsigned int useroffset; /* Usercopy region offset */
unsigned int usersize; /* Usercopy region size */
+#endif
struct kmem_cache_node *node[MAX_NUMNODES];
};
-#ifdef CONFIG_SYSFS
+#if defined(CONFIG_SYSFS) && !defined(CONFIG_SLUB_TINY)
#define SLAB_SUPPORTS_SYSFS
void sysfs_slab_unlink(struct kmem_cache *);
void sysfs_slab_release(struct kmem_cache *);
diff --git a/include/linux/smc911x.h b/include/linux/smc911x.h
deleted file mode 100644
index 8cace8189e74..000000000000
--- a/include/linux/smc911x.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __SMC911X_H__
-#define __SMC911X_H__
-
-#define SMC911X_USE_16BIT (1 << 0)
-#define SMC911X_USE_32BIT (1 << 1)
-
-struct smc911x_platdata {
- unsigned long flags;
- unsigned long irq_flags; /* IRQF_... */
- int irq_polarity;
-};
-
-#endif /* __SMC911X_H__ */
diff --git a/include/linux/soc/apple/rtkit.h b/include/linux/soc/apple/rtkit.h
index 88eb832eac7b..c9cabb679cd1 100644
--- a/include/linux/soc/apple/rtkit.h
+++ b/include/linux/soc/apple/rtkit.h
@@ -152,4 +152,16 @@ int apple_rtkit_send_message(struct apple_rtkit *rtk, u8 ep, u64 message,
int apple_rtkit_send_message_wait(struct apple_rtkit *rtk, u8 ep, u64 message,
unsigned long timeout, bool atomic);
+/*
+ * Process incoming messages in atomic context.
+ * This only guarantees that messages arrive as far as the recv_message_early
+ * callback; drivers expecting to handle incoming messages synchronously
+ * by calling this function must do it that way.
+ * Will return 1 if some data was processed, 0 if none was, or a
+ * negative error code on failure.
+ *
+ * @rtk: RTKit reference
+ */
+int apple_rtkit_poll(struct apple_rtkit *rtk);
+
#endif /* _LINUX_APPLE_RTKIT_H_ */
diff --git a/include/linux/soc/mediatek/mtk-mmsys.h b/include/linux/soc/mediatek/mtk-mmsys.h
index 59117d970daf..b85f66db33e1 100644
--- a/include/linux/soc/mediatek/mtk-mmsys.h
+++ b/include/linux/soc/mediatek/mtk-mmsys.h
@@ -9,6 +9,13 @@
enum mtk_ddp_comp_id;
struct device;
+enum mtk_dpi_out_format_con {
+ MTK_DPI_RGB888_SDR_CON,
+ MTK_DPI_RGB888_DDR_CON,
+ MTK_DPI_RGB565_SDR_CON,
+ MTK_DPI_RGB565_DDR_CON
+};
+
enum mtk_ddp_comp_id {
DDP_COMPONENT_AAL0,
DDP_COMPONENT_AAL1,
@@ -65,4 +72,6 @@ void mtk_mmsys_ddp_disconnect(struct device *dev,
enum mtk_ddp_comp_id cur,
enum mtk_ddp_comp_id next);
+void mtk_mmsys_ddp_dpi_fmt_config(struct device *dev, u32 val);
+
#endif /* __MTK_MMSYS_H */
diff --git a/include/linux/soc/mediatek/mtk-mutex.h b/include/linux/soc/mediatek/mtk-mutex.h
index a0f4f51a3b45..b335c2837cd8 100644
--- a/include/linux/soc/mediatek/mtk-mutex.h
+++ b/include/linux/soc/mediatek/mtk-mutex.h
@@ -20,6 +20,8 @@ enum mtk_mutex_mod_index {
MUTEX_MOD_IDX_MDP_WDMA,
MUTEX_MOD_IDX_MDP_AAL0,
MUTEX_MOD_IDX_MDP_CCORR0,
+ MUTEX_MOD_IDX_MDP_HDR0,
+ MUTEX_MOD_IDX_MDP_COLOR0,
MUTEX_MOD_IDX_MAX /* ALWAYS keep at the end */
};
diff --git a/include/linux/soc/mediatek/mtk_sip_svc.h b/include/linux/soc/mediatek/mtk_sip_svc.h
index 082398e0cfb1..0761128b4354 100644
--- a/include/linux/soc/mediatek/mtk_sip_svc.h
+++ b/include/linux/soc/mediatek/mtk_sip_svc.h
@@ -22,4 +22,7 @@
ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, MTK_SIP_SMC_CONVENTION, \
ARM_SMCCC_OWNER_SIP, fn_id)
+/* IOMMU related SMC call */
+#define MTK_SIP_KERNEL_IOMMU_CONTROL MTK_SIP_SMC_CMD(0x514)
+
#endif
diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h
index 7e00cca06709..a0746d4aec20 100644
--- a/include/linux/soc/mediatek/mtk_wed.h
+++ b/include/linux/soc/mediatek/mtk_wed.h
@@ -5,21 +5,76 @@
#include <linux/rcupdate.h>
#include <linux/regmap.h>
#include <linux/pci.h>
+#include <linux/skbuff.h>
#define MTK_WED_TX_QUEUES 2
+#define MTK_WED_RX_QUEUES 2
+
+#define WED_WO_STA_REC 0x6
struct mtk_wed_hw;
struct mtk_wdma_desc;
+enum mtk_wed_wo_cmd {
+ MTK_WED_WO_CMD_WED_CFG,
+ MTK_WED_WO_CMD_WED_RX_STAT,
+ MTK_WED_WO_CMD_RRO_SER,
+ MTK_WED_WO_CMD_DBG_INFO,
+ MTK_WED_WO_CMD_DEV_INFO,
+ MTK_WED_WO_CMD_BSS_INFO,
+ MTK_WED_WO_CMD_STA_REC,
+ MTK_WED_WO_CMD_DEV_INFO_DUMP,
+ MTK_WED_WO_CMD_BSS_INFO_DUMP,
+ MTK_WED_WO_CMD_STA_REC_DUMP,
+ MTK_WED_WO_CMD_BA_INFO_DUMP,
+ MTK_WED_WO_CMD_FBCMD_Q_DUMP,
+ MTK_WED_WO_CMD_FW_LOG_CTRL,
+ MTK_WED_WO_CMD_LOG_FLUSH,
+ MTK_WED_WO_CMD_CHANGE_STATE,
+ MTK_WED_WO_CMD_CPU_STATS_ENABLE,
+ MTK_WED_WO_CMD_CPU_STATS_DUMP,
+ MTK_WED_WO_CMD_EXCEPTION_INIT,
+ MTK_WED_WO_CMD_PROF_CTRL,
+ MTK_WED_WO_CMD_STA_BA_DUMP,
+ MTK_WED_WO_CMD_BA_CTRL_DUMP,
+ MTK_WED_WO_CMD_RXCNT_CTRL,
+ MTK_WED_WO_CMD_RXCNT_INFO,
+ MTK_WED_WO_CMD_SET_CAP,
+ MTK_WED_WO_CMD_CCIF_RING_DUMP,
+ MTK_WED_WO_CMD_WED_END
+};
+
+struct mtk_rxbm_desc {
+ __le32 buf0;
+ __le32 token;
+} __packed __aligned(4);
+
+enum mtk_wed_bus_tye {
+ MTK_WED_BUS_PCIE,
+ MTK_WED_BUS_AXI,
+};
+
+#define MTK_WED_RING_CONFIGURED BIT(0)
struct mtk_wed_ring {
struct mtk_wdma_desc *desc;
dma_addr_t desc_phys;
+ u32 desc_size;
int size;
+ u32 flags;
u32 reg_base;
void __iomem *wpdma;
};
+struct mtk_wed_wo_rx_stats {
+ __le16 wlan_idx;
+ __le16 tid;
+ __le32 rx_pkt_cnt;
+ __le32 rx_byte_cnt;
+ __le32 rx_err_cnt;
+ __le32 rx_drop_cnt;
+};
+
struct mtk_wed_device {
#ifdef CONFIG_NET_MEDIATEK_SOC_WED
const struct mtk_wed_ops *ops;
@@ -28,30 +83,74 @@ struct mtk_wed_device {
bool init_done, running;
int wdma_idx;
int irq;
+ u8 version;
+
+ /* used by wlan driver */
+ u32 rev_id;
struct mtk_wed_ring tx_ring[MTK_WED_TX_QUEUES];
+ struct mtk_wed_ring rx_ring[MTK_WED_RX_QUEUES];
struct mtk_wed_ring txfree_ring;
struct mtk_wed_ring tx_wdma[MTK_WED_TX_QUEUES];
+ struct mtk_wed_ring rx_wdma[MTK_WED_RX_QUEUES];
struct {
int size;
void **pages;
struct mtk_wdma_desc *desc;
dma_addr_t desc_phys;
- } buf_ring;
+ } tx_buf_ring;
+
+ struct {
+ int size;
+ struct page_frag_cache rx_page;
+ struct mtk_rxbm_desc *desc;
+ dma_addr_t desc_phys;
+ } rx_buf_ring;
+
+ struct {
+ struct mtk_wed_ring ring;
+ dma_addr_t miod_phys;
+ dma_addr_t fdbk_phys;
+ } rro;
/* filled by driver: */
struct {
- struct pci_dev *pci_dev;
+ union {
+ struct platform_device *platform_dev;
+ struct pci_dev *pci_dev;
+ };
+ enum mtk_wed_bus_tye bus_type;
+ void __iomem *base;
+ u32 phy_base;
u32 wpdma_phys;
+ u32 wpdma_int;
+ u32 wpdma_mask;
+ u32 wpdma_tx;
+ u32 wpdma_txfree;
+ u32 wpdma_rx_glo;
+ u32 wpdma_rx;
+
+ bool wcid_512;
u16 token_start;
unsigned int nbuf;
+ unsigned int rx_nbuf;
+ unsigned int rx_npkt;
+ unsigned int rx_size;
+
+ u8 tx_tbit[MTK_WED_TX_QUEUES];
+ u8 rx_tbit[MTK_WED_RX_QUEUES];
+ u8 txfree_tbit;
u32 (*init_buf)(void *ptr, dma_addr_t phys, int token_id);
int (*offload_enable)(struct mtk_wed_device *wed);
void (*offload_disable)(struct mtk_wed_device *wed);
+ u32 (*init_rx_buf)(struct mtk_wed_device *wed, int size);
+ void (*release_rx_buf)(struct mtk_wed_device *wed);
+ void (*update_wo_rx_stats)(struct mtk_wed_device *wed,
+ struct mtk_wed_wo_rx_stats *stats);
} wlan;
#endif
};
@@ -59,10 +158,16 @@ struct mtk_wed_device {
struct mtk_wed_ops {
int (*attach)(struct mtk_wed_device *dev);
int (*tx_ring_setup)(struct mtk_wed_device *dev, int ring,
- void __iomem *regs);
+ void __iomem *regs, bool reset);
+ int (*rx_ring_setup)(struct mtk_wed_device *dev, int ring,
+ void __iomem *regs, bool reset);
int (*txfree_ring_setup)(struct mtk_wed_device *dev,
void __iomem *regs);
+ int (*msg_update)(struct mtk_wed_device *dev, int cmd_id,
+ void *data, int len);
void (*detach)(struct mtk_wed_device *dev);
+ void (*ppe_check)(struct mtk_wed_device *dev, struct sk_buff *skb,
+ u32 reason, u32 hash);
void (*stop)(struct mtk_wed_device *dev);
void (*start)(struct mtk_wed_device *dev, u32 irq_mask);
@@ -97,12 +202,22 @@ mtk_wed_device_attach(struct mtk_wed_device *dev)
return ret;
}
+static inline bool
+mtk_wed_get_rx_capa(struct mtk_wed_device *dev)
+{
+#ifdef CONFIG_NET_MEDIATEK_SOC_WED
+ return dev->version != 1;
+#else
+ return false;
+#endif
+}
+
#ifdef CONFIG_NET_MEDIATEK_SOC_WED
#define mtk_wed_device_active(_dev) !!(_dev)->ops
#define mtk_wed_device_detach(_dev) (_dev)->ops->detach(_dev)
#define mtk_wed_device_start(_dev, _mask) (_dev)->ops->start(_dev, _mask)
-#define mtk_wed_device_tx_ring_setup(_dev, _ring, _regs) \
- (_dev)->ops->tx_ring_setup(_dev, _ring, _regs)
+#define mtk_wed_device_tx_ring_setup(_dev, _ring, _regs, _reset) \
+ (_dev)->ops->tx_ring_setup(_dev, _ring, _regs, _reset)
#define mtk_wed_device_txfree_ring_setup(_dev, _regs) \
(_dev)->ops->txfree_ring_setup(_dev, _regs)
#define mtk_wed_device_reg_read(_dev, _reg) \
@@ -113,6 +228,14 @@ mtk_wed_device_attach(struct mtk_wed_device *dev)
(_dev)->ops->irq_get(_dev, _mask)
#define mtk_wed_device_irq_set_mask(_dev, _mask) \
(_dev)->ops->irq_set_mask(_dev, _mask)
+#define mtk_wed_device_rx_ring_setup(_dev, _ring, _regs, _reset) \
+ (_dev)->ops->rx_ring_setup(_dev, _ring, _regs, _reset)
+#define mtk_wed_device_ppe_check(_dev, _skb, _reason, _hash) \
+ (_dev)->ops->ppe_check(_dev, _skb, _reason, _hash)
+#define mtk_wed_device_update_msg(_dev, _id, _msg, _len) \
+ (_dev)->ops->msg_update(_dev, _id, _msg, _len)
+#define mtk_wed_device_stop(_dev) (_dev)->ops->stop(_dev)
+#define mtk_wed_device_dma_reset(_dev) (_dev)->ops->reset_dma(_dev)
#else
static inline bool mtk_wed_device_active(struct mtk_wed_device *dev)
{
@@ -120,12 +243,17 @@ static inline bool mtk_wed_device_active(struct mtk_wed_device *dev)
}
#define mtk_wed_device_detach(_dev) do {} while (0)
#define mtk_wed_device_start(_dev, _mask) do {} while (0)
-#define mtk_wed_device_tx_ring_setup(_dev, _ring, _regs) -ENODEV
+#define mtk_wed_device_tx_ring_setup(_dev, _ring, _regs, _reset) -ENODEV
#define mtk_wed_device_txfree_ring_setup(_dev, _ring, _regs) -ENODEV
#define mtk_wed_device_reg_read(_dev, _reg) 0
#define mtk_wed_device_reg_write(_dev, _reg, _val) do {} while (0)
#define mtk_wed_device_irq_get(_dev, _mask) 0
#define mtk_wed_device_irq_set_mask(_dev, _mask) do {} while (0)
+#define mtk_wed_device_rx_ring_setup(_dev, _ring, _regs, _reset) -ENODEV
+#define mtk_wed_device_ppe_check(_dev, _skb, _reason, _hash) do {} while (0)
+#define mtk_wed_device_update_msg(_dev, _id, _msg, _len) -ENODEV
+#define mtk_wed_device_stop(_dev) do {} while (0)
+#define mtk_wed_device_dma_reset(_dev) do {} while (0)
#endif
#endif
diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h
index 9ed5384c5ca1..ad1fd718169d 100644
--- a/include/linux/soc/qcom/llcc-qcom.h
+++ b/include/linux/soc/qcom/llcc-qcom.h
@@ -42,7 +42,19 @@
#define LLCC_CPUHWT 36
#define LLCC_MDMCLAD2 37
#define LLCC_CAMEXP1 38
+#define LLCC_CMPTHCP 39
+#define LLCC_LCPDARE 40
#define LLCC_AENPU 45
+#define LLCC_ISLAND1 46
+#define LLCC_ISLAND2 47
+#define LLCC_ISLAND3 48
+#define LLCC_ISLAND4 49
+#define LLCC_CAMEXP2 50
+#define LLCC_CAMEXP3 51
+#define LLCC_CAMEXP4 52
+#define LLCC_DISP_WB 53
+#define LLCC_DISP_1 54
+#define LLCC_VIDVSP 64
/**
* struct llcc_slice_desc - Cache slice descriptor
@@ -78,11 +90,40 @@ struct llcc_edac_reg_data {
u8 ways_shift;
};
+struct llcc_edac_reg_offset {
+ /* LLCC TRP registers */
+ u32 trp_ecc_error_status0;
+ u32 trp_ecc_error_status1;
+ u32 trp_ecc_sb_err_syn0;
+ u32 trp_ecc_db_err_syn0;
+ u32 trp_ecc_error_cntr_clear;
+ u32 trp_interrupt_0_status;
+ u32 trp_interrupt_0_clear;
+ u32 trp_interrupt_0_enable;
+
+ /* LLCC Common registers */
+ u32 cmn_status0;
+ u32 cmn_interrupt_0_enable;
+ u32 cmn_interrupt_2_enable;
+
+ /* LLCC DRP registers */
+ u32 drp_ecc_error_cfg;
+ u32 drp_ecc_error_cntr_clear;
+ u32 drp_interrupt_status;
+ u32 drp_interrupt_clear;
+ u32 drp_interrupt_enable;
+ u32 drp_ecc_error_status0;
+ u32 drp_ecc_error_status1;
+ u32 drp_ecc_sb_err_syn0;
+ u32 drp_ecc_db_err_syn0;
+};
+
/**
* struct llcc_drv_data - Data associated with the llcc driver
* @regmap: regmap associated with the llcc device
* @bcast_regmap: regmap associated with llcc broadcast offset
* @cfg: pointer to the data structure for slice configuration
+ * @edac_reg_offset: Offset of the LLCC EDAC registers
* @lock: mutex associated with each slice
* @cfg_size: size of the config data table
* @max_slices: max slices as read from device tree
@@ -96,6 +137,7 @@ struct llcc_drv_data {
struct regmap *regmap;
struct regmap *bcast_regmap;
const struct llcc_slice_config *cfg;
+ const struct llcc_edac_reg_offset *edac_reg_offset;
struct mutex lock;
u32 cfg_size;
u32 max_slices;
diff --git a/include/linux/soc/qcom/qmi.h b/include/linux/soc/qcom/qmi.h
index b1f80e756d2a..469e02d2aa0d 100644
--- a/include/linux/soc/qcom/qmi.h
+++ b/include/linux/soc/qcom/qmi.h
@@ -75,7 +75,7 @@ struct qmi_elem_info {
enum qmi_array_type array_type;
u8 tlv_type;
u32 offset;
- struct qmi_elem_info *ei_array;
+ const struct qmi_elem_info *ei_array;
};
#define QMI_RESULT_SUCCESS_V01 0
@@ -102,7 +102,7 @@ struct qmi_response_type_v01 {
u16 error;
};
-extern struct qmi_elem_info qmi_response_type_v01_ei[];
+extern const struct qmi_elem_info qmi_response_type_v01_ei[];
/**
* struct qmi_service - context to track lookup-results
@@ -173,7 +173,7 @@ struct qmi_txn {
struct completion completion;
int result;
- struct qmi_elem_info *ei;
+ const struct qmi_elem_info *ei;
void *dest;
};
@@ -189,7 +189,7 @@ struct qmi_msg_handler {
unsigned int type;
unsigned int msg_id;
- struct qmi_elem_info *ei;
+ const struct qmi_elem_info *ei;
size_t decoded_size;
void (*fn)(struct qmi_handle *qmi, struct sockaddr_qrtr *sq,
@@ -249,23 +249,23 @@ void qmi_handle_release(struct qmi_handle *qmi);
ssize_t qmi_send_request(struct qmi_handle *qmi, struct sockaddr_qrtr *sq,
struct qmi_txn *txn, int msg_id, size_t len,
- struct qmi_elem_info *ei, const void *c_struct);
+ const struct qmi_elem_info *ei, const void *c_struct);
ssize_t qmi_send_response(struct qmi_handle *qmi, struct sockaddr_qrtr *sq,
struct qmi_txn *txn, int msg_id, size_t len,
- struct qmi_elem_info *ei, const void *c_struct);
+ const struct qmi_elem_info *ei, const void *c_struct);
ssize_t qmi_send_indication(struct qmi_handle *qmi, struct sockaddr_qrtr *sq,
- int msg_id, size_t len, struct qmi_elem_info *ei,
+ int msg_id, size_t len, const struct qmi_elem_info *ei,
const void *c_struct);
void *qmi_encode_message(int type, unsigned int msg_id, size_t *len,
- unsigned int txn_id, struct qmi_elem_info *ei,
+ unsigned int txn_id, const struct qmi_elem_info *ei,
const void *c_struct);
int qmi_decode_message(const void *buf, size_t len,
- struct qmi_elem_info *ei, void *c_struct);
+ const struct qmi_elem_info *ei, void *c_struct);
int qmi_txn_init(struct qmi_handle *qmi, struct qmi_txn *txn,
- struct qmi_elem_info *ei, void *c_struct);
+ const struct qmi_elem_info *ei, void *c_struct);
int qmi_txn_wait(struct qmi_txn *txn, unsigned long timeout);
void qmi_txn_cancel(struct qmi_txn *txn);
diff --git a/include/linux/soc/qcom/smd-rpm.h b/include/linux/soc/qcom/smd-rpm.h
index 82c9d489833a..62de54992e49 100644
--- a/include/linux/soc/qcom/smd-rpm.h
+++ b/include/linux/soc/qcom/smd-rpm.h
@@ -19,6 +19,7 @@ struct qcom_smd_rpm;
#define QCOM_SMD_RPM_CLK_BUF_A 0x616B6C63
#define QCOM_SMD_RPM_LDOA 0x616f646c
#define QCOM_SMD_RPM_LDOB 0x626F646C
+#define QCOM_SMD_RPM_LDOE 0x656f646c
#define QCOM_SMD_RPM_RWCX 0x78637772
#define QCOM_SMD_RPM_RWMX 0x786d7772
#define QCOM_SMD_RPM_RWLC 0x636c7772
@@ -32,6 +33,7 @@ struct qcom_smd_rpm;
#define QCOM_SMD_RPM_QUP_CLK 0x707571
#define QCOM_SMD_RPM_SMPA 0x61706d73
#define QCOM_SMD_RPM_SMPB 0x62706d73
+#define QCOM_SMD_RPM_SMPE 0x65706d73
#define QCOM_SMD_RPM_SPDM 0x63707362
#define QCOM_SMD_RPM_VSA 0x00617376
#define QCOM_SMD_RPM_MMAXI_CLK 0x69786d6d
@@ -41,6 +43,7 @@ struct qcom_smd_rpm;
#define QCOM_SMD_RPM_HWKM_CLK 0x6d6b7768
#define QCOM_SMD_RPM_PKA_CLK 0x616b70
#define QCOM_SMD_RPM_MCFG_CLK 0x6766636d
+#define QCOM_SMD_RPM_MMXI_CLK 0x69786d6d
int qcom_rpm_smd_write(struct qcom_smd_rpm *rpm,
int state,
diff --git a/include/linux/soc/sunxi/sunxi_sram.h b/include/linux/soc/sunxi/sunxi_sram.h
index c5f663bba9c2..60e274d1b821 100644
--- a/include/linux/soc/sunxi/sunxi_sram.h
+++ b/include/linux/soc/sunxi/sunxi_sram.h
@@ -14,6 +14,6 @@
#define _SUNXI_SRAM_H_
int sunxi_sram_claim(struct device *dev);
-int sunxi_sram_release(struct device *dev);
+void sunxi_sram_release(struct device *dev);
#endif /* _SUNXI_SRAM_H_ */
diff --git a/include/linux/socket.h b/include/linux/socket.h
index de3701a2a212..13c3a237b9c9 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -33,7 +33,10 @@ typedef __kernel_sa_family_t sa_family_t;
struct sockaddr {
sa_family_t sa_family; /* address family, AF_xxx */
- char sa_data[14]; /* 14 bytes of protocol address */
+ union {
+ char sa_data_min[14]; /* Minimum 14 bytes of protocol address */
+ DECLARE_FLEX_ARRAY(char, sa_data);
+ };
};
struct linger {
diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h
index d45902fb4cad..bae5e2369b4f 100644
--- a/include/linux/sockptr.h
+++ b/include/linux/sockptr.h
@@ -64,6 +64,11 @@ static inline int copy_to_sockptr_offset(sockptr_t dst, size_t offset,
return 0;
}
+static inline int copy_to_sockptr(sockptr_t dst, const void *src, size_t size)
+{
+ return copy_to_sockptr_offset(dst, 0, src, size);
+}
+
static inline void *memdup_sockptr(sockptr_t src, size_t len)
{
void *p = kmalloc_track_caller(len, GFP_USER | __GFP_NOWARN);
diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h
index 39058c841469..9e4537f409c2 100644
--- a/include/linux/soundwire/sdw.h
+++ b/include/linux/soundwire/sdw.h
@@ -839,6 +839,8 @@ struct sdw_defer {
* @set_bus_conf: Set the bus configuration
* @pre_bank_switch: Callback for pre bank switch
* @post_bank_switch: Callback for post bank switch
+ * @read_ping_status: Read status from PING frames, reported with two bits per Device.
+ * Bits 31:24 are reserved.
*/
struct sdw_master_ops {
int (*read_prop)(struct sdw_bus *bus);
@@ -855,6 +857,7 @@ struct sdw_master_ops {
struct sdw_bus_params *params);
int (*pre_bank_switch)(struct sdw_bus *bus);
int (*post_bank_switch)(struct sdw_bus *bus);
+ u32 (*read_ping_status)(struct sdw_bus *bus);
};
@@ -889,6 +892,9 @@ struct sdw_master_ops {
* meaningful if multi_link is set. If set to 1, hardware-based
* synchronization will be used even if a stream only uses a single
* SoundWire segment.
+ * @dev_num_ida_min: if set, defines the minimum values for the IDA
+ * used to allocate system-unique device numbers. This value needs to be
+ * identical across all SoundWire bus in the system.
*/
struct sdw_bus {
struct device *dev;
@@ -913,12 +919,15 @@ struct sdw_bus {
u32 bank_switch_timeout;
bool multi_link;
int hw_sync_min_links;
+ int dev_num_ida_min;
};
int sdw_bus_master_add(struct sdw_bus *bus, struct device *parent,
struct fwnode_handle *fwnode);
void sdw_bus_master_delete(struct sdw_bus *bus);
+void sdw_show_ping_status(struct sdw_bus *bus, bool sync_delay);
+
/**
* sdw_port_config: Master or Slave Port configuration
*
diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h
index ec16ae49e6a4..d2f581feed67 100644
--- a/include/linux/soundwire/sdw_intel.h
+++ b/include/linux/soundwire/sdw_intel.h
@@ -15,32 +15,21 @@
#define SDW_LINK_SIZE 0x10000
/* Intel SHIM Registers Definition */
+/* LCAP */
#define SDW_SHIM_LCAP 0x0
-#define SDW_SHIM_LCTL 0x4
-#define SDW_SHIM_IPPTR 0x8
-#define SDW_SHIM_SYNC 0xC
-
-#define SDW_SHIM_CTLSCAP(x) (0x010 + 0x60 * (x))
-#define SDW_SHIM_CTLS0CM(x) (0x012 + 0x60 * (x))
-#define SDW_SHIM_CTLS1CM(x) (0x014 + 0x60 * (x))
-#define SDW_SHIM_CTLS2CM(x) (0x016 + 0x60 * (x))
-#define SDW_SHIM_CTLS3CM(x) (0x018 + 0x60 * (x))
-#define SDW_SHIM_PCMSCAP(x) (0x020 + 0x60 * (x))
+#define SDW_SHIM_LCAP_LCOUNT_MASK GENMASK(2, 0)
-#define SDW_SHIM_PCMSYCHM(x, y) (0x022 + (0x60 * (x)) + (0x2 * (y)))
-#define SDW_SHIM_PCMSYCHC(x, y) (0x042 + (0x60 * (x)) + (0x2 * (y)))
-#define SDW_SHIM_PDMSCAP(x) (0x062 + 0x60 * (x))
-#define SDW_SHIM_IOCTL(x) (0x06C + 0x60 * (x))
-#define SDW_SHIM_CTMCTL(x) (0x06E + 0x60 * (x))
-
-#define SDW_SHIM_WAKEEN 0x190
-#define SDW_SHIM_WAKESTS 0x192
+/* LCTL */
+#define SDW_SHIM_LCTL 0x4
#define SDW_SHIM_LCTL_SPA BIT(0)
#define SDW_SHIM_LCTL_SPA_MASK GENMASK(3, 0)
#define SDW_SHIM_LCTL_CPA BIT(8)
#define SDW_SHIM_LCTL_CPA_MASK GENMASK(11, 8)
+/* SYNC */
+#define SDW_SHIM_SYNC 0xC
+
#define SDW_SHIM_SYNC_SYNCPRD_VAL_24 (24000 / SDW_CADENCE_GSYNC_KHZ - 1)
#define SDW_SHIM_SYNC_SYNCPRD_VAL_38_4 (38400 / SDW_CADENCE_GSYNC_KHZ - 1)
#define SDW_SHIM_SYNC_SYNCPRD GENMASK(14, 0)
@@ -49,19 +38,33 @@
#define SDW_SHIM_SYNC_CMDSYNC BIT(16)
#define SDW_SHIM_SYNC_SYNCGO BIT(24)
+/* Control stream capabililities and channel mask */
+#define SDW_SHIM_CTLSCAP(x) (0x010 + 0x60 * (x))
+#define SDW_SHIM_CTLS0CM(x) (0x012 + 0x60 * (x))
+#define SDW_SHIM_CTLS1CM(x) (0x014 + 0x60 * (x))
+#define SDW_SHIM_CTLS2CM(x) (0x016 + 0x60 * (x))
+#define SDW_SHIM_CTLS3CM(x) (0x018 + 0x60 * (x))
+
+/* PCM Stream capabilities */
+#define SDW_SHIM_PCMSCAP(x) (0x020 + 0x60 * (x))
+
#define SDW_SHIM_PCMSCAP_ISS GENMASK(3, 0)
#define SDW_SHIM_PCMSCAP_OSS GENMASK(7, 4)
#define SDW_SHIM_PCMSCAP_BSS GENMASK(12, 8)
+/* PCM Stream Channel Map */
+#define SDW_SHIM_PCMSYCHM(x, y) (0x022 + (0x60 * (x)) + (0x2 * (y)))
+
+/* PCM Stream Channel Count */
+#define SDW_SHIM_PCMSYCHC(x, y) (0x042 + (0x60 * (x)) + (0x2 * (y)))
+
#define SDW_SHIM_PCMSYCM_LCHN GENMASK(3, 0)
#define SDW_SHIM_PCMSYCM_HCHN GENMASK(7, 4)
#define SDW_SHIM_PCMSYCM_STREAM GENMASK(13, 8)
#define SDW_SHIM_PCMSYCM_DIR BIT(15)
-#define SDW_SHIM_PDMSCAP_ISS GENMASK(3, 0)
-#define SDW_SHIM_PDMSCAP_OSS GENMASK(7, 4)
-#define SDW_SHIM_PDMSCAP_BSS GENMASK(12, 8)
-#define SDW_SHIM_PDMSCAP_CPSS GENMASK(15, 13)
+/* IO control */
+#define SDW_SHIM_IOCTL(x) (0x06C + 0x60 * (x))
#define SDW_SHIM_IOCTL_MIF BIT(0)
#define SDW_SHIM_IOCTL_CO BIT(1)
@@ -73,13 +76,23 @@
#define SDW_SHIM_IOCTL_CIBD BIT(8)
#define SDW_SHIM_IOCTL_DIBD BIT(9)
-#define SDW_SHIM_CTMCTL_DACTQE BIT(0)
-#define SDW_SHIM_CTMCTL_DODS BIT(1)
-#define SDW_SHIM_CTMCTL_DOAIS GENMASK(4, 3)
+/* Wake Enable*/
+#define SDW_SHIM_WAKEEN 0x190
#define SDW_SHIM_WAKEEN_ENABLE BIT(0)
+
+/* Wake Status */
+#define SDW_SHIM_WAKESTS 0x192
+
#define SDW_SHIM_WAKESTS_STATUS BIT(0)
+/* AC Timing control */
+#define SDW_SHIM_CTMCTL(x) (0x06E + 0x60 * (x))
+
+#define SDW_SHIM_CTMCTL_DACTQE BIT(0)
+#define SDW_SHIM_CTMCTL_DODS BIT(1)
+#define SDW_SHIM_CTMCTL_DOAIS GENMASK(4, 3)
+
/* Intel ALH Register definitions */
#define SDW_ALH_STRMZCFG(x) (0x000 + (0x4 * (x)))
#define SDW_ALH_NUM_STREAMS 64
@@ -273,8 +286,6 @@ int sdw_intel_startup(struct sdw_intel_ctx *ctx);
void sdw_intel_exit(struct sdw_intel_ctx *ctx);
-void sdw_intel_enable_irq(void __iomem *mmio_base, bool enable);
-
irqreturn_t sdw_intel_thread(int irq, void *dev_id);
#define SDW_INTEL_QUIRK_MASK_BUS_DISABLE BIT(1)
diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h
index 2ba044d0d5e5..8e984d75f5b6 100644
--- a/include/linux/spi/spi-mem.h
+++ b/include/linux/spi/spi-mem.h
@@ -225,7 +225,7 @@ static inline void *spi_mem_get_drvdata(struct spi_mem *mem)
/**
* struct spi_controller_mem_ops - SPI memory operations
* @adjust_op_size: shrink the data xfer of an operation to match controller's
- * limitations (can be alignment of max RX/TX size
+ * limitations (can be alignment or max RX/TX size
* limitations)
* @supports_op: check if an operation is supported by the controller
* @exec_op: execute a SPI memory operation
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index e6c73d5ff1a8..9a32495fbb1f 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -356,6 +356,7 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch
* @max_speed_hz: Highest supported transfer speed
* @flags: other constraints relevant to this driver
* @slave: indicates that this is an SPI slave controller
+ * @target: indicates that this is an SPI target controller
* @devm_allocated: whether the allocation of this struct is devres-managed
* @max_transfer_size: function that returns the max transfer size for
* a &spi_device; may be %NULL, so the default %SIZE_MAX will be used.
@@ -378,6 +379,8 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch
* @cleanup: frees controller-specific state
* @can_dma: determine whether this controller supports DMA
* @dma_map_dev: device which can be used for DMA mapping
+ * @cur_rx_dma_dev: device which is currently used for RX DMA mapping
+ * @cur_tx_dma_dev: device which is currently used for TX DMA mapping
* @queued: whether this controller is providing an internal message queue
* @kworker: pointer to thread struct for message pump
* @pump_messages: work struct for scheduling work to the message pump
@@ -438,6 +441,7 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch
* @mem_caps: controller capabilities for the handling of memory operations.
* @unprepare_message: undo any work done by prepare_message().
* @slave_abort: abort the ongoing transfer request on an SPI slave controller
+ * @target_abort: abort the ongoing transfer request on an SPI target controller
* @cs_gpiods: Array of GPIO descs to use as chip select lines; one per CS
* number. Any individual value may be NULL for CS lines that
* are not GPIOs (driven by the SPI controller itself).
@@ -469,6 +473,7 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch
* SPI_TRANS_FAIL_NO_START.
* @queue_empty: signal green light for opportunistically skipping the queue
* for spi_sync transfers.
+ * @must_async: disable all fast paths in the core
*
* Each SPI controller can communicate with one or more @spi_device
* children. These make a small bus, sharing MOSI, MISO and SCK signals
@@ -532,8 +537,12 @@ struct spi_controller {
/* Flag indicating if the allocation of this struct is devres-managed */
bool devm_allocated;
- /* Flag indicating this is an SPI slave controller */
- bool slave;
+ union {
+ /* Flag indicating this is an SPI slave controller */
+ bool slave;
+ /* Flag indicating this is an SPI target controller */
+ bool target;
+ };
/*
* on some hardware transfer / message size may be constrained
@@ -609,6 +618,8 @@ struct spi_controller {
struct spi_device *spi,
struct spi_transfer *xfer);
struct device *dma_map_dev;
+ struct device *cur_rx_dma_dev;
+ struct device *cur_tx_dma_dev;
/*
* These hooks are for drivers that want to use the generic
@@ -644,7 +655,10 @@ struct spi_controller {
struct spi_message *message);
int (*unprepare_message)(struct spi_controller *ctlr,
struct spi_message *message);
- int (*slave_abort)(struct spi_controller *ctlr);
+ union {
+ int (*slave_abort)(struct spi_controller *ctlr);
+ int (*target_abort)(struct spi_controller *ctlr);
+ };
/*
* These hooks are for drivers that use a generic implementation
@@ -690,6 +704,7 @@ struct spi_controller {
/* Flag for enabling opportunistic skipping of the queue in spi_sync */
bool queue_empty;
+ bool must_async;
};
static inline void *spi_controller_get_devdata(struct spi_controller *ctlr)
@@ -721,6 +736,11 @@ static inline bool spi_controller_is_slave(struct spi_controller *ctlr)
return IS_ENABLED(CONFIG_SPI_SLAVE) && ctlr->slave;
}
+static inline bool spi_controller_is_target(struct spi_controller *ctlr)
+{
+ return IS_ENABLED(CONFIG_SPI_SLAVE) && ctlr->target;
+}
+
/* PM calls that need to be issued by the driver */
extern int spi_controller_suspend(struct spi_controller *ctlr);
extern int spi_controller_resume(struct spi_controller *ctlr);
@@ -757,6 +777,21 @@ static inline struct spi_controller *spi_alloc_slave(struct device *host,
return __spi_alloc_controller(host, size, true);
}
+static inline struct spi_controller *spi_alloc_host(struct device *dev,
+ unsigned int size)
+{
+ return __spi_alloc_controller(dev, size, false);
+}
+
+static inline struct spi_controller *spi_alloc_target(struct device *dev,
+ unsigned int size)
+{
+ if (!IS_ENABLED(CONFIG_SPI_SLAVE))
+ return NULL;
+
+ return __spi_alloc_controller(dev, size, true);
+}
+
struct spi_controller *__devm_spi_alloc_controller(struct device *dev,
unsigned int size,
bool slave);
@@ -776,6 +811,21 @@ static inline struct spi_controller *devm_spi_alloc_slave(struct device *dev,
return __devm_spi_alloc_controller(dev, size, true);
}
+static inline struct spi_controller *devm_spi_alloc_host(struct device *dev,
+ unsigned int size)
+{
+ return __devm_spi_alloc_controller(dev, size, false);
+}
+
+static inline struct spi_controller *devm_spi_alloc_target(struct device *dev,
+ unsigned int size)
+{
+ if (!IS_ENABLED(CONFIG_SPI_SLAVE))
+ return NULL;
+
+ return __devm_spi_alloc_controller(dev, size, true);
+}
+
extern int spi_register_controller(struct spi_controller *ctlr);
extern int devm_spi_register_controller(struct device *dev,
struct spi_controller *ctlr);
@@ -846,6 +896,7 @@ struct spi_res {
* @bits_per_word: select a bits_per_word other than the device default
* for this transfer. If 0 the default (from @spi_device) is used.
* @dummy_data: indicates transfer is dummy bytes transfer.
+ * @cs_off: performs the transfer with chipselect off.
* @cs_change: affects chipselect after this transfer completes
* @cs_change_delay: delay between cs deassert and assert when
* @cs_change is set and @spi_transfer is not the last in @spi_message
@@ -956,6 +1007,7 @@ struct spi_transfer {
struct sg_table rx_sg;
unsigned dummy_data:1;
+ unsigned cs_off:1;
unsigned cs_change:1;
unsigned tx_nbits:3;
unsigned rx_nbits:3;
@@ -1133,6 +1185,7 @@ static inline void spi_message_free(struct spi_message *m)
extern int spi_setup(struct spi_device *spi);
extern int spi_async(struct spi_device *spi, struct spi_message *message);
extern int spi_slave_abort(struct spi_device *spi);
+extern int spi_target_abort(struct spi_device *spi);
static inline size_t
spi_max_message_size(struct spi_device *spi)
@@ -1506,6 +1559,9 @@ extern void spi_unregister_device(struct spi_device *spi);
extern const struct spi_device_id *
spi_get_device_id(const struct spi_device *sdev);
+extern const void *
+spi_get_device_match_data(const struct spi_device *sdev);
+
static inline bool
spi_transfer_is_last(struct spi_controller *ctlr, struct spi_transfer *xfer)
{
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 5c0c5174155d..1341f7d62da4 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __LINUX_SPINLOCK_H
#define __LINUX_SPINLOCK_H
+#define __LINUX_INSIDE_SPINLOCK_H
/*
* include/linux/spinlock.h - generic spinlock/rwlock declarations
@@ -492,4 +493,5 @@ int __alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *lock_mask,
void free_bucket_spinlocks(spinlock_t *locks);
+#undef __LINUX_INSIDE_SPINLOCK_H
#endif /* __LINUX_SPINLOCK_H */
diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h
index 51fa0dab68c4..89eb6f4c659c 100644
--- a/include/linux/spinlock_api_smp.h
+++ b/include/linux/spinlock_api_smp.h
@@ -1,7 +1,7 @@
#ifndef __LINUX_SPINLOCK_API_SMP_H
#define __LINUX_SPINLOCK_API_SMP_H
-#ifndef __LINUX_SPINLOCK_H
+#ifndef __LINUX_INSIDE_SPINLOCK_H
# error "please don't include this file directly"
#endif
diff --git a/include/linux/spinlock_api_up.h b/include/linux/spinlock_api_up.h
index b8ba00ccccde..819aeba1c87e 100644
--- a/include/linux/spinlock_api_up.h
+++ b/include/linux/spinlock_api_up.h
@@ -1,7 +1,7 @@
#ifndef __LINUX_SPINLOCK_API_UP_H
#define __LINUX_SPINLOCK_API_UP_H
-#ifndef __LINUX_SPINLOCK_H
+#ifndef __LINUX_INSIDE_SPINLOCK_H
# error "please don't include this file directly"
#endif
diff --git a/include/linux/spinlock_rt.h b/include/linux/spinlock_rt.h
index 835aedaf68ac..61c49b16f69a 100644
--- a/include/linux/spinlock_rt.h
+++ b/include/linux/spinlock_rt.h
@@ -2,7 +2,7 @@
#ifndef __LINUX_SPINLOCK_RT_H
#define __LINUX_SPINLOCK_RT_H
-#ifndef __LINUX_SPINLOCK_H
+#ifndef __LINUX_INSIDE_SPINLOCK_H
#error Do not include directly. Use spinlock.h
#endif
diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h
index 16521074b6f7..c87204247592 100644
--- a/include/linux/spinlock_up.h
+++ b/include/linux/spinlock_up.h
@@ -1,7 +1,7 @@
#ifndef __LINUX_SPINLOCK_UP_H
#define __LINUX_SPINLOCK_UP_H
-#ifndef __LINUX_SPINLOCK_H
+#ifndef __LINUX_INSIDE_SPINLOCK_H
# error "please don't include this file directly"
#endif
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 01226e4d960a..9b9d0bbf1d3c 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -47,11 +47,8 @@ int init_srcu_struct(struct srcu_struct *ssp);
#include <linux/srcutiny.h>
#elif defined(CONFIG_TREE_SRCU)
#include <linux/srcutree.h>
-#elif defined(CONFIG_SRCU)
-#error "Unknown SRCU implementation specified to kernel configuration"
#else
-/* Dummy definition for things like notifiers. Actual use gets link error. */
-struct srcu_struct { };
+#error "Unknown SRCU implementation specified to kernel configuration"
#endif
void call_srcu(struct srcu_struct *ssp, struct rcu_head *head,
@@ -64,11 +61,21 @@ unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp);
unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp);
bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie);
-#ifdef CONFIG_SRCU
+#ifdef CONFIG_NEED_SRCU_NMI_SAFE
+int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp);
+void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) __releases(ssp);
+#else
+static inline int __srcu_read_lock_nmisafe(struct srcu_struct *ssp)
+{
+ return __srcu_read_lock(ssp);
+}
+static inline void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx)
+{
+ __srcu_read_unlock(ssp, idx);
+}
+#endif /* CONFIG_NEED_SRCU_NMI_SAFE */
+
void srcu_init(void);
-#else /* #ifdef CONFIG_SRCU */
-static inline void srcu_init(void) { }
-#endif /* #else #ifdef CONFIG_SRCU */
#ifdef CONFIG_DEBUG_LOCK_ALLOC
@@ -104,6 +111,18 @@ static inline int srcu_read_lock_held(const struct srcu_struct *ssp)
#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+#define SRCU_NMI_UNKNOWN 0x0
+#define SRCU_NMI_UNSAFE 0x1
+#define SRCU_NMI_SAFE 0x2
+
+#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_TREE_SRCU)
+void srcu_check_nmi_safety(struct srcu_struct *ssp, bool nmi_safe);
+#else
+static inline void srcu_check_nmi_safety(struct srcu_struct *ssp,
+ bool nmi_safe) { }
+#endif
+
+
/**
* srcu_dereference_check - fetch SRCU-protected pointer for later dereferencing
* @p: the pointer to fetch and protect for later dereferencing
@@ -161,17 +180,36 @@ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp)
{
int retval;
+ srcu_check_nmi_safety(ssp, false);
retval = __srcu_read_lock(ssp);
rcu_lock_acquire(&(ssp)->dep_map);
return retval;
}
+/**
+ * srcu_read_lock_nmisafe - register a new reader for an SRCU-protected structure.
+ * @ssp: srcu_struct in which to register the new reader.
+ *
+ * Enter an SRCU read-side critical section, but in an NMI-safe manner.
+ * See srcu_read_lock() for more information.
+ */
+static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp)
+{
+ int retval;
+
+ srcu_check_nmi_safety(ssp, true);
+ retval = __srcu_read_lock_nmisafe(ssp);
+ rcu_lock_acquire(&(ssp)->dep_map);
+ return retval;
+}
+
/* Used by tracing, cannot be traced and cannot invoke lockdep. */
static inline notrace int
srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp)
{
int retval;
+ srcu_check_nmi_safety(ssp, false);
retval = __srcu_read_lock(ssp);
return retval;
}
@@ -187,14 +225,32 @@ static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx)
__releases(ssp)
{
WARN_ON_ONCE(idx & ~0x1);
+ srcu_check_nmi_safety(ssp, false);
rcu_lock_release(&(ssp)->dep_map);
__srcu_read_unlock(ssp, idx);
}
+/**
+ * srcu_read_unlock_nmisafe - unregister a old reader from an SRCU-protected structure.
+ * @ssp: srcu_struct in which to unregister the old reader.
+ * @idx: return value from corresponding srcu_read_lock().
+ *
+ * Exit an SRCU read-side critical section, but in an NMI-safe manner.
+ */
+static inline void srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx)
+ __releases(ssp)
+{
+ WARN_ON_ONCE(idx & ~0x1);
+ srcu_check_nmi_safety(ssp, true);
+ rcu_lock_release(&(ssp)->dep_map);
+ __srcu_read_unlock_nmisafe(ssp, idx);
+}
+
/* Used by tracing, cannot be traced and cannot call lockdep. */
static inline notrace void
srcu_read_unlock_notrace(struct srcu_struct *ssp, int idx) __releases(ssp)
{
+ srcu_check_nmi_safety(ssp, false);
__srcu_read_unlock(ssp, idx);
}
diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
index 6cfaa0a9a9b9..5aa5e0faf6a1 100644
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -15,10 +15,10 @@
struct srcu_struct {
short srcu_lock_nesting[2]; /* srcu_read_lock() nesting depth. */
- unsigned short srcu_idx; /* Current reader array element in bit 0x2. */
- unsigned short srcu_idx_max; /* Furthest future srcu_idx request. */
u8 srcu_gp_running; /* GP workqueue running? */
u8 srcu_gp_waiting; /* GP waiting for readers? */
+ unsigned long srcu_idx; /* Current reader array element in bit 0x2. */
+ unsigned long srcu_idx_max; /* Furthest future srcu_idx request. */
struct swait_queue_head srcu_wq;
/* Last srcu_read_unlock() wakes GP. */
struct rcu_head *srcu_cb_head; /* Pending callbacks: Head. */
@@ -82,10 +82,12 @@ static inline void srcu_torture_stats_print(struct srcu_struct *ssp,
int idx;
idx = ((data_race(READ_ONCE(ssp->srcu_idx)) + 1) & 0x2) >> 1;
- pr_alert("%s%s Tiny SRCU per-CPU(idx=%d): (%hd,%hd)\n",
+ pr_alert("%s%s Tiny SRCU per-CPU(idx=%d): (%hd,%hd) gp: %lu->%lu\n",
tt, tf, idx,
data_race(READ_ONCE(ssp->srcu_lock_nesting[!idx])),
- data_race(READ_ONCE(ssp->srcu_lock_nesting[idx])));
+ data_race(READ_ONCE(ssp->srcu_lock_nesting[idx])),
+ data_race(READ_ONCE(ssp->srcu_idx)),
+ data_race(READ_ONCE(ssp->srcu_idx_max)));
}
#endif
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index e3014319d1ad..c689a81752c9 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -23,8 +23,9 @@ struct srcu_struct;
*/
struct srcu_data {
/* Read-side state. */
- unsigned long srcu_lock_count[2]; /* Locks per CPU. */
- unsigned long srcu_unlock_count[2]; /* Unlocks per CPU. */
+ atomic_long_t srcu_lock_count[2]; /* Locks per CPU. */
+ atomic_long_t srcu_unlock_count[2]; /* Unlocks per CPU. */
+ int srcu_nmi_safety; /* NMI-safe srcu_struct structure? */
/* Update-side state. */
spinlock_t __private lock ____cacheline_internodealigned_in_smp;
diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h
index bc2797955de9..9ca7798d7a31 100644
--- a/include/linux/stackdepot.h
+++ b/include/linux/stackdepot.h
@@ -14,9 +14,15 @@
#include <linux/gfp.h>
typedef u32 depot_stack_handle_t;
+/*
+ * Number of bits in the handle that stack depot doesn't use. Users may store
+ * information in them.
+ */
+#define STACK_DEPOT_EXTRA_BITS 5
depot_stack_handle_t __stack_depot_save(unsigned long *entries,
unsigned int nr_entries,
+ unsigned int extra_bits,
gfp_t gfp_flags, bool can_alloc);
/*
@@ -59,6 +65,8 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries,
unsigned int stack_depot_fetch(depot_stack_handle_t handle,
unsigned long **entries);
+unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle);
+
int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size,
int spaces);
diff --git a/include/linux/stackprotector.h b/include/linux/stackprotector.h
index 4c678c4fec58..9c88707d9a0f 100644
--- a/include/linux/stackprotector.h
+++ b/include/linux/stackprotector.h
@@ -6,6 +6,25 @@
#include <linux/sched.h>
#include <linux/random.h>
+/*
+ * On 64-bit architectures, protect against non-terminated C string overflows
+ * by zeroing out the first byte of the canary; this leaves 56 bits of entropy.
+ */
+#ifdef CONFIG_64BIT
+# ifdef __LITTLE_ENDIAN
+# define CANARY_MASK 0xffffffffffffff00UL
+# else /* big endian, 64 bits: */
+# define CANARY_MASK 0x00ffffffffffffffUL
+# endif
+#else /* 32 bits: */
+# define CANARY_MASK 0xffffffffUL
+#endif
+
+static inline unsigned long get_random_canary(void)
+{
+ return get_random_long() & CANARY_MASK;
+}
+
#if defined(CONFIG_STACKPROTECTOR) || defined(CONFIG_ARM64_PTR_AUTH)
# include <asm/stackprotector.h>
#else
diff --git a/include/linux/stat.h b/include/linux/stat.h
index 7df06931f25d..ff277ced50e9 100644
--- a/include/linux/stat.h
+++ b/include/linux/stat.h
@@ -50,6 +50,8 @@ struct kstat {
struct timespec64 btime; /* File creation time */
u64 blocks;
u64 mnt_id;
+ u32 dio_mem_align;
+ u32 dio_offset_align;
};
#endif
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 8df475db88c0..83ca2e8eb6b5 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -257,7 +257,6 @@ struct plat_stmmacenet_data {
u8 vlan_fail_q;
unsigned int eee_usecs_rate;
struct pci_dev *pdev;
- bool has_crossts;
int int_snapshot_num;
int ext_snapshot_num;
bool int_snapshot_en;
@@ -272,5 +271,6 @@ struct plat_stmmacenet_data {
int msi_tx_base_vec;
bool use_phy_wol;
bool sph_disable;
+ bool serdes_up_after_phy_linkup;
};
#endif
diff --git a/include/linux/string.h b/include/linux/string.h
index 61ec7e4f6311..db28802ab0a6 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -176,7 +176,7 @@ extern void kfree_const(const void *x);
extern char *kstrdup(const char *s, gfp_t gfp) __malloc;
extern const char *kstrdup_const(const char *s, gfp_t gfp);
extern char *kstrndup(const char *s, size_t len, gfp_t gfp);
-extern void *kmemdup(const void *src, size_t len, gfp_t gfp);
+extern void *kmemdup(const void *src, size_t len, gfp_t gfp) __realloc_size(2);
extern char *kmemdup_nul(const char *s, size_t len, gfp_t gfp);
extern char **argv_split(gfp_t gfp, const char *str, int *argcp);
@@ -261,6 +261,49 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count,
int pad);
/**
+ * strtomem_pad - Copy NUL-terminated string to non-NUL-terminated buffer
+ *
+ * @dest: Pointer of destination character array (marked as __nonstring)
+ * @src: Pointer to NUL-terminated string
+ * @pad: Padding character to fill any remaining bytes of @dest after copy
+ *
+ * This is a replacement for strncpy() uses where the destination is not
+ * a NUL-terminated string, but with bounds checking on the source size, and
+ * an explicit padding character. If padding is not required, use strtomem().
+ *
+ * Note that the size of @dest is not an argument, as the length of @dest
+ * must be discoverable by the compiler.
+ */
+#define strtomem_pad(dest, src, pad) do { \
+ const size_t _dest_len = __builtin_object_size(dest, 1); \
+ \
+ BUILD_BUG_ON(!__builtin_constant_p(_dest_len) || \
+ _dest_len == (size_t)-1); \
+ memcpy_and_pad(dest, _dest_len, src, strnlen(src, _dest_len), pad); \
+} while (0)
+
+/**
+ * strtomem - Copy NUL-terminated string to non-NUL-terminated buffer
+ *
+ * @dest: Pointer of destination character array (marked as __nonstring)
+ * @src: Pointer to NUL-terminated string
+ *
+ * This is a replacement for strncpy() uses where the destination is not
+ * a NUL-terminated string, but with bounds checking on the source size, and
+ * without trailing padding. If padding is required, use strtomem_pad().
+ *
+ * Note that the size of @dest is not an argument, as the length of @dest
+ * must be discoverable by the compiler.
+ */
+#define strtomem(dest, src) do { \
+ const size_t _dest_len = __builtin_object_size(dest, 1); \
+ \
+ BUILD_BUG_ON(!__builtin_constant_p(_dest_len) || \
+ _dest_len == (size_t)-1); \
+ memcpy(dest, src, min(_dest_len, strnlen(src, _dest_len))); \
+} while (0)
+
+/**
* memset_after - Set a value after a struct member to the end of a struct
*
* @obj: Address of target struct instance
diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h
index 4d72258d42fd..8530c7328269 100644
--- a/include/linux/string_helpers.h
+++ b/include/linux/string_helpers.h
@@ -21,6 +21,8 @@ enum string_size_units {
void string_get_size(u64 size, u64 blk_size, enum string_size_units units,
char *buf, int len);
+int parse_int_array_user(const char __user *from, size_t count, int **array);
+
#define UNESCAPE_SPACE BIT(0)
#define UNESCAPE_OCTAL BIT(1)
#define UNESCAPE_HEX BIT(2)
@@ -126,4 +128,9 @@ static inline const char *str_enabled_disabled(bool v)
return v ? "enabled" : "disabled";
}
+static inline const char *str_read_write(bool v)
+{
+ return v ? "read" : "write";
+}
+
#endif
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 75eea5ebb179..770ef2cb5775 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -246,6 +246,7 @@ void rpc_clnt_xprt_switch_remove_xprt(struct rpc_clnt *, struct rpc_xprt *);
bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt,
const struct sockaddr *sap);
void rpc_clnt_xprt_set_online(struct rpc_clnt *clnt, struct rpc_xprt *xprt);
+void rpc_clnt_disconnect(struct rpc_clnt *clnt);
void rpc_cleanup_clids(void);
static inline int rpc_reply_expected(struct rpc_task *task)
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index acc62647317c..b8ca3ecaf8d7 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -209,11 +209,17 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *);
struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req);
void rpc_put_task(struct rpc_task *);
void rpc_put_task_async(struct rpc_task *);
+bool rpc_task_set_rpc_status(struct rpc_task *task, int rpc_status);
+void rpc_task_try_cancel(struct rpc_task *task, int error);
void rpc_signal_task(struct rpc_task *);
void rpc_exit_task(struct rpc_task *);
void rpc_exit(struct rpc_task *, int);
void rpc_release_calldata(const struct rpc_call_ops *, void *);
void rpc_killall_tasks(struct rpc_clnt *);
+unsigned long rpc_cancel_tasks(struct rpc_clnt *clnt, int error,
+ bool (*fnmatch)(const struct rpc_task *,
+ const void *),
+ const void *data);
void rpc_execute(struct rpc_task *);
void rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *);
void rpc_init_wait_queue(struct rpc_wait_queue *, const char *);
@@ -252,7 +258,7 @@ int rpc_malloc(struct rpc_task *);
void rpc_free(struct rpc_task *);
int rpciod_up(void);
void rpciod_down(void);
-int __rpc_wait_for_completion_task(struct rpc_task *task, wait_bit_action_f *);
+int rpc_wait_for_completion_task(struct rpc_task *task);
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
struct net;
void rpc_show_tasks(struct net *);
@@ -264,11 +270,6 @@ extern struct workqueue_struct *xprtiod_workqueue;
void rpc_prepare_task(struct rpc_task *task);
gfp_t rpc_task_gfp_mask(void);
-static inline int rpc_wait_for_completion_task(struct rpc_task *task)
-{
- return __rpc_wait_for_completion_task(task, NULL);
-}
-
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS)
static inline const char * rpc_qname(const struct rpc_wait_queue *q)
{
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index daecb009c05b..ed722dd33b46 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -220,13 +220,6 @@ static inline __be32 svc_getu32(struct kvec *iov)
return val;
}
-static inline void svc_ungetu32(struct kvec *iov)
-{
- __be32 *vp = (__be32 *)iov->iov_base;
- iov->iov_base = (void *)(vp - 1);
- iov->iov_len += sizeof(*vp);
-}
-
static inline void svc_putu32(struct kvec *iov, __be32 val)
{
__be32 *vp = iov->iov_base + iov->iov_len;
@@ -311,7 +304,6 @@ struct svc_rqst {
struct auth_domain * rq_gssclient; /* "gss/"-style peer info */
struct svc_cacherep * rq_cacherep; /* cache info */
struct task_struct *rq_task; /* service thread */
- spinlock_t rq_lock; /* per-request lock */
struct net *rq_bc_net; /* pointer to backchannel's
* net namespace
*/
@@ -472,6 +464,7 @@ struct svc_procedure {
/* XDR free result: */
void (*pc_release)(struct svc_rqst *);
unsigned int pc_argsize; /* argument struct size */
+ unsigned int pc_argzero; /* how much of argument to clear */
unsigned int pc_ressize; /* result struct size */
unsigned int pc_cachetype; /* cache info (NFS) */
unsigned int pc_xdrressize; /* maximum size of XDR reply */
@@ -544,16 +537,27 @@ static inline void svc_reserve_auth(struct svc_rqst *rqstp, int space)
}
/**
- * svcxdr_init_decode - Prepare an xdr_stream for svc Call decoding
+ * svcxdr_init_decode - Prepare an xdr_stream for Call decoding
* @rqstp: controlling server RPC transaction context
*
+ * This function currently assumes the RPC header in rq_arg has
+ * already been decoded. Upon return, xdr->p points to the
+ * location of the upper layer header.
*/
static inline void svcxdr_init_decode(struct svc_rqst *rqstp)
{
struct xdr_stream *xdr = &rqstp->rq_arg_stream;
- struct kvec *argv = rqstp->rq_arg.head;
+ struct xdr_buf *buf = &rqstp->rq_arg;
+ struct kvec *argv = buf->head;
+
+ /*
+ * svc_getnl() and friends do not keep the xdr_buf's ::len
+ * field up to date. Refresh that field before initializing
+ * the argument decoding stream.
+ */
+ buf->len = buf->head->iov_len + buf->page_len + buf->tail->iov_len;
- xdr_init_decode(xdr, &rqstp->rq_arg, argv->iov_base, NULL);
+ xdr_init_decode(xdr, buf, argv->iov_base, NULL);
xdr_set_scratch_page(xdr, rqstp->rq_scratch_page);
}
@@ -576,7 +580,7 @@ static inline void svcxdr_init_encode(struct svc_rqst *rqstp)
xdr->end = resv->iov_base + PAGE_SIZE - rqstp->rq_auth_slack;
buf->len = resv->iov_len;
xdr->page_ptr = buf->pages - 1;
- buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages);
+ buf->buflen = PAGE_SIZE * (rqstp->rq_page_end - buf->pages);
buf->buflen -= rqstp->rq_auth_slack;
xdr->rqst = NULL;
}
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 69175029abbb..f84e2a1358e1 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -240,6 +240,8 @@ typedef int (*kxdrdproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf,
__be32 *p, struct rpc_rqst *rqst);
+extern void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
+ struct page **pages, struct rpc_rqst *rqst);
extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
extern int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec,
size_t nbytes);
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 70f2921e2e70..cfe19a028918 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -75,7 +75,7 @@ extern struct suspend_stats suspend_stats;
static inline void dpm_save_failed_dev(const char *name)
{
- strlcpy(suspend_stats.failed_devs[suspend_stats.last_failed_dev],
+ strscpy(suspend_stats.failed_devs[suspend_stats.last_failed_dev],
name,
sizeof(suspend_stats.failed_devs[0]));
suspend_stats.last_failed_dev++;
@@ -191,6 +191,7 @@ struct platform_s2idle_ops {
int (*begin)(void);
int (*prepare)(void);
int (*prepare_late)(void);
+ void (*check)(void);
bool (*wake)(void);
void (*restore_early)(void);
void (*restore)(void);
@@ -510,8 +511,8 @@ extern bool pm_save_wakeup_count(unsigned int count);
extern void pm_wakep_autosleep_enabled(bool set);
extern void pm_print_active_wakeup_sources(void);
-extern void lock_system_sleep(void);
-extern void unlock_system_sleep(void);
+extern unsigned int lock_system_sleep(void);
+extern void unlock_system_sleep(unsigned int);
#else /* !CONFIG_PM_SLEEP */
@@ -534,8 +535,8 @@ static inline void pm_system_wakeup(void) {}
static inline void pm_wakeup_clear(bool reset) {}
static inline void pm_system_irq_wakeup(unsigned int irq_number) {}
-static inline void lock_system_sleep(void) {}
-static inline void unlock_system_sleep(void) {}
+static inline unsigned int lock_system_sleep(void) { return 0; }
+static inline void unlock_system_sleep(unsigned int flags) {}
#endif /* !CONFIG_PM_SLEEP */
diff --git a/include/linux/swab.h b/include/linux/swab.h
index bcff5149861a..9b804dbb0d79 100644
--- a/include/linux/swab.h
+++ b/include/linux/swab.h
@@ -20,4 +20,29 @@
# define swab64s __swab64s
# define swahw32s __swahw32s
# define swahb32s __swahb32s
+
+static inline void swab16_array(u16 *buf, unsigned int words)
+{
+ while (words--) {
+ swab16s(buf);
+ buf++;
+ }
+}
+
+static inline void swab32_array(u32 *buf, unsigned int words)
+{
+ while (words--) {
+ swab32s(buf);
+ buf++;
+ }
+}
+
+static inline void swab64_array(u64 *buf, unsigned int words)
+{
+ while (words--) {
+ swab64s(buf);
+ buf++;
+ }
+}
+
#endif /* _LINUX_SWAB_H */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 43150b9bbc5c..2787b84eaf12 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -55,22 +55,14 @@ static inline int current_is_kswapd(void)
* actions on faults.
*/
-#define SWP_SWAPIN_ERROR_NUM 1
-#define SWP_SWAPIN_ERROR (MAX_SWAPFILES + SWP_HWPOISON_NUM + \
- SWP_MIGRATION_NUM + SWP_DEVICE_NUM + \
- SWP_PTE_MARKER_NUM)
/*
- * PTE markers are used to persist information onto PTEs that are mapped with
- * file-backed memories. As its name "PTE" hints, it should only be applied to
- * the leaves of pgtables.
+ * PTE markers are used to persist information onto PTEs that otherwise
+ * should be a none pte. As its name "PTE" hints, it should only be
+ * applied to the leaves of pgtables.
*/
-#ifdef CONFIG_PTE_MARKER
#define SWP_PTE_MARKER_NUM 1
#define SWP_PTE_MARKER (MAX_SWAPFILES + SWP_HWPOISON_NUM + \
SWP_MIGRATION_NUM + SWP_DEVICE_NUM)
-#else
-#define SWP_PTE_MARKER_NUM 0
-#endif
/*
* Unaddressable device memory support. See include/linux/hmm.h and
@@ -125,7 +117,7 @@ static inline int current_is_kswapd(void)
#define MAX_SWAPFILES \
((1 << MAX_SWAPFILES_SHIFT) - SWP_DEVICE_NUM - \
SWP_MIGRATION_NUM - SWP_HWPOISON_NUM - \
- SWP_PTE_MARKER_NUM - SWP_SWAPIN_ERROR_NUM)
+ SWP_PTE_MARKER_NUM)
/*
* Magic header for a swap area. The first part of the union is
@@ -162,6 +154,10 @@ union swap_header {
*/
struct reclaim_state {
unsigned long reclaimed_slab;
+#ifdef CONFIG_LRU_GEN
+ /* per-thread mm walk data */
+ struct lru_gen_mm_walk *mm_walk;
+#endif
};
#ifdef __KERNEL__
@@ -351,6 +347,11 @@ static inline swp_entry_t folio_swap_entry(struct folio *folio)
return entry;
}
+static inline void folio_set_swap_entry(struct folio *folio, swp_entry_t entry)
+{
+ folio->private = (void *)entry.val;
+}
+
/* linux/mm/workingset.c */
void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages);
void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg);
@@ -375,11 +376,11 @@ extern unsigned long totalreserve_pages;
/* linux/mm/swap.c */
-extern void lru_note_cost(struct lruvec *lruvec, bool file,
- unsigned int nr_pages);
-extern void lru_note_cost_folio(struct folio *);
-extern void folio_add_lru(struct folio *);
-extern void lru_cache_add(struct page *);
+void lru_note_cost(struct lruvec *lruvec, bool file,
+ unsigned int nr_io, unsigned int nr_rotated);
+void lru_note_cost_refault(struct folio *);
+void folio_add_lru(struct folio *);
+void folio_add_lru_vma(struct folio *, struct vm_area_struct *);
void mark_page_accessed(struct page *);
void folio_mark_accessed(struct folio *);
@@ -417,7 +418,8 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
unsigned long nr_pages,
gfp_t gfp_mask,
- unsigned int reclaim_options);
+ unsigned int reclaim_options,
+ nodemask_t *nodemask);
extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem,
gfp_t gfp_mask, bool noswap,
pg_data_t *pgdat,
@@ -461,7 +463,7 @@ static inline unsigned long total_swapcache_pages(void)
extern void free_swap_cache(struct page *page);
extern void free_page_and_swap_cache(struct page *);
-extern void free_pages_and_swap_cache(struct page **, int);
+extern void free_pages_and_swap_cache(struct encoded_page **, int);
/* linux/mm/swapfile.c */
extern atomic_long_t nr_swap_pages;
extern long total_swap_pages;
@@ -481,7 +483,8 @@ static inline long get_nr_swap_pages(void)
extern void si_swapinfo(struct sysinfo *);
swp_entry_t folio_alloc_swap(struct folio *folio);
-extern void put_swap_page(struct page *page, swp_entry_t entry);
+bool folio_free_swap(struct folio *folio);
+void put_swap_folio(struct folio *folio, swp_entry_t entry);
extern swp_entry_t get_swap_page_of_type(int);
extern int get_swap_pages(int n, swp_entry_t swp_entries[], int entry_size);
extern int add_swap_count_continuation(swp_entry_t, gfp_t);
@@ -500,7 +503,6 @@ extern int __swp_swapcount(swp_entry_t entry);
extern int swp_swapcount(swp_entry_t entry);
extern struct swap_info_struct *page_swap_info(struct page *);
extern struct swap_info_struct *swp_swap_info(swp_entry_t entry);
-extern int try_to_free_swap(struct page *);
struct backing_dev_info;
extern int init_swap_address_space(unsigned int type, unsigned long nr_pages);
extern void exit_swap_address_space(unsigned int type);
@@ -566,7 +568,7 @@ static inline void swap_free(swp_entry_t swp)
{
}
-static inline void put_swap_page(struct page *page, swp_entry_t swp)
+static inline void put_swap_folio(struct folio *folio, swp_entry_t swp)
{
}
@@ -585,11 +587,6 @@ static inline int swp_swapcount(swp_entry_t entry)
return 0;
}
-static inline int try_to_free_swap(struct page *page)
-{
- return 0;
-}
-
static inline swp_entry_t folio_alloc_swap(struct folio *folio)
{
swp_entry_t entry;
@@ -597,6 +594,11 @@ static inline swp_entry_t folio_alloc_swap(struct folio *folio)
return entry;
}
+static inline bool folio_free_swap(struct folio *folio)
+{
+ return false;
+}
+
static inline int add_swap_extent(struct swap_info_struct *sis,
unsigned long start_page,
unsigned long nr_pages, sector_t start_block)
@@ -657,7 +659,7 @@ static inline void folio_throttle_swaprate(struct folio *folio, gfp_t gfp)
cgroup_throttle_swaprate(&folio->page, gfp);
}
-#ifdef CONFIG_MEMCG_SWAP
+#if defined(CONFIG_MEMCG) && defined(CONFIG_SWAP)
void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry);
int __mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry);
static inline int mem_cgroup_try_charge_swap(struct folio *folio,
@@ -677,7 +679,7 @@ static inline void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_p
}
extern long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg);
-extern bool mem_cgroup_swap_full(struct page *page);
+extern bool mem_cgroup_swap_full(struct folio *folio);
#else
static inline void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry)
{
@@ -699,7 +701,7 @@ static inline long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg)
return get_nr_swap_pages();
}
-static inline bool mem_cgroup_swap_full(struct page *page)
+static inline bool mem_cgroup_swap_full(struct folio *folio)
{
return vm_swap_full();
}
diff --git a/include/linux/swap_cgroup.h b/include/linux/swap_cgroup.h
index a12dd1c3966c..ae73a87775b3 100644
--- a/include/linux/swap_cgroup.h
+++ b/include/linux/swap_cgroup.h
@@ -4,7 +4,7 @@
#include <linux/swap.h>
-#ifdef CONFIG_MEMCG_SWAP
+#if defined(CONFIG_MEMCG) && defined(CONFIG_SWAP)
extern unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
unsigned short old, unsigned short new);
@@ -40,6 +40,6 @@ static inline void swap_cgroup_swapoff(int type)
return;
}
-#endif /* CONFIG_MEMCG_SWAP */
+#endif
#endif /* __LINUX_SWAP_CGROUP_H */
diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h
index 54078542134c..7ed529a77c5b 100644
--- a/include/linux/swapfile.h
+++ b/include/linux/swapfile.h
@@ -8,6 +8,11 @@
*/
extern struct swap_info_struct *swap_info[];
extern unsigned long generic_max_swapfile_size(void);
-extern unsigned long max_swapfile_size(void);
+unsigned long arch_max_swapfile_size(void);
+
+/* Maximum swapfile size supported for the arch (not inclusive). */
+extern unsigned long swapfile_maximum_size;
+/* Whether swap migration entry supports storing A/D bits for the arch */
+extern bool swap_migration_ad_supported;
#endif /* _LINUX_SWAPFILE_H */
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index a3d435bf9f97..b982dd614572 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -8,6 +8,10 @@
#ifdef CONFIG_MMU
+#ifdef CONFIG_SWAP
+#include <linux/swapfile.h>
+#endif /* CONFIG_SWAP */
+
/*
* swapcache pages are stored in the swapper_space radix tree. We want to
* get good packing density in that tree, so the index should be dense in
@@ -23,6 +27,47 @@
#define SWP_TYPE_SHIFT (BITS_PER_XA_VALUE - MAX_SWAPFILES_SHIFT)
#define SWP_OFFSET_MASK ((1UL << SWP_TYPE_SHIFT) - 1)
+/*
+ * Definitions only for PFN swap entries (see is_pfn_swap_entry()). To
+ * store PFN, we only need SWP_PFN_BITS bits. Each of the pfn swap entries
+ * can use the extra bits to store other information besides PFN.
+ */
+#ifdef MAX_PHYSMEM_BITS
+#define SWP_PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT)
+#else /* MAX_PHYSMEM_BITS */
+#define SWP_PFN_BITS min_t(int, \
+ sizeof(phys_addr_t) * 8 - PAGE_SHIFT, \
+ SWP_TYPE_SHIFT)
+#endif /* MAX_PHYSMEM_BITS */
+#define SWP_PFN_MASK (BIT(SWP_PFN_BITS) - 1)
+
+/**
+ * Migration swap entry specific bitfield definitions. Layout:
+ *
+ * |----------+--------------------|
+ * | swp_type | swp_offset |
+ * |----------+--------+-+-+-------|
+ * | | resv |D|A| PFN |
+ * |----------+--------+-+-+-------|
+ *
+ * @SWP_MIG_YOUNG_BIT: Whether the page used to have young bit set (bit A)
+ * @SWP_MIG_DIRTY_BIT: Whether the page used to have dirty bit set (bit D)
+ *
+ * Note: A/D bits will be stored in migration entries iff there're enough
+ * free bits in arch specific swp offset. By default we'll ignore A/D bits
+ * when migrating a page. Please refer to migration_entry_supports_ad()
+ * for more information. If there're more bits besides PFN and A/D bits,
+ * they should be reserved and always be zeros.
+ */
+#define SWP_MIG_YOUNG_BIT (SWP_PFN_BITS)
+#define SWP_MIG_DIRTY_BIT (SWP_PFN_BITS + 1)
+#define SWP_MIG_TOTAL_BITS (SWP_PFN_BITS + 2)
+
+#define SWP_MIG_YOUNG BIT(SWP_MIG_YOUNG_BIT)
+#define SWP_MIG_DIRTY BIT(SWP_MIG_DIRTY_BIT)
+
+static inline bool is_pfn_swap_entry(swp_entry_t entry);
+
/* Clear all flags but only keep swp_entry_t related information */
static inline pte_t pte_swp_clear_flags(pte_t pte)
{
@@ -64,6 +109,17 @@ static inline pgoff_t swp_offset(swp_entry_t entry)
return entry.val & SWP_OFFSET_MASK;
}
+/*
+ * This should only be called upon a pfn swap entry to get the PFN stored
+ * in the swap entry. Please refers to is_pfn_swap_entry() for definition
+ * of pfn swap entry.
+ */
+static inline unsigned long swp_offset_pfn(swp_entry_t entry)
+{
+ VM_BUG_ON(!is_pfn_swap_entry(entry));
+ return swp_offset(entry) & SWP_PFN_MASK;
+}
+
/* check whether a pte points to a swap entry */
static inline int is_swap_pte(pte_t pte)
{
@@ -108,16 +164,6 @@ static inline void *swp_to_radix_entry(swp_entry_t entry)
return xa_mk_value(entry.val);
}
-static inline swp_entry_t make_swapin_error_entry(struct page *page)
-{
- return swp_entry(SWP_SWAPIN_ERROR, page_to_pfn(page));
-}
-
-static inline int is_swapin_error_entry(swp_entry_t entry)
-{
- return swp_type(entry) == SWP_SWAPIN_ERROR;
-}
-
#if IS_ENABLED(CONFIG_DEVICE_PRIVATE)
static inline swp_entry_t make_readable_device_private_entry(pgoff_t offset)
{
@@ -240,6 +286,52 @@ static inline swp_entry_t make_writable_migration_entry(pgoff_t offset)
return swp_entry(SWP_MIGRATION_WRITE, offset);
}
+/*
+ * Returns whether the host has large enough swap offset field to support
+ * carrying over pgtable A/D bits for page migrations. The result is
+ * pretty much arch specific.
+ */
+static inline bool migration_entry_supports_ad(void)
+{
+#ifdef CONFIG_SWAP
+ return swap_migration_ad_supported;
+#else /* CONFIG_SWAP */
+ return false;
+#endif /* CONFIG_SWAP */
+}
+
+static inline swp_entry_t make_migration_entry_young(swp_entry_t entry)
+{
+ if (migration_entry_supports_ad())
+ return swp_entry(swp_type(entry),
+ swp_offset(entry) | SWP_MIG_YOUNG);
+ return entry;
+}
+
+static inline bool is_migration_entry_young(swp_entry_t entry)
+{
+ if (migration_entry_supports_ad())
+ return swp_offset(entry) & SWP_MIG_YOUNG;
+ /* Keep the old behavior of aging page after migration */
+ return false;
+}
+
+static inline swp_entry_t make_migration_entry_dirty(swp_entry_t entry)
+{
+ if (migration_entry_supports_ad())
+ return swp_entry(swp_type(entry),
+ swp_offset(entry) | SWP_MIG_DIRTY);
+ return entry;
+}
+
+static inline bool is_migration_entry_dirty(swp_entry_t entry)
+{
+ if (migration_entry_supports_ad())
+ return swp_offset(entry) & SWP_MIG_DIRTY;
+ /* Keep the old behavior of clean page after migration */
+ return false;
+}
+
extern void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
spinlock_t *ptl);
extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
@@ -247,8 +339,8 @@ extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
#ifdef CONFIG_HUGETLB_PAGE
extern void __migration_entry_wait_huge(pte_t *ptep, spinlock_t *ptl);
extern void migration_entry_wait_huge(struct vm_area_struct *vma, pte_t *pte);
-#endif
-#else
+#endif /* CONFIG_HUGETLB_PAGE */
+#else /* CONFIG_MIGRATION */
static inline swp_entry_t make_readable_migration_entry(pgoff_t offset)
{
return swp_entry(0, 0);
@@ -276,7 +368,7 @@ static inline void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
#ifdef CONFIG_HUGETLB_PAGE
static inline void __migration_entry_wait_huge(pte_t *ptep, spinlock_t *ptl) { }
static inline void migration_entry_wait_huge(struct vm_area_struct *vma, pte_t *pte) { }
-#endif
+#endif /* CONFIG_HUGETLB_PAGE */
static inline int is_writable_migration_entry(swp_entry_t entry)
{
return 0;
@@ -286,66 +378,69 @@ static inline int is_readable_migration_entry(swp_entry_t entry)
return 0;
}
-#endif
-
-typedef unsigned long pte_marker;
-
-#define PTE_MARKER_UFFD_WP BIT(0)
-#define PTE_MARKER_MASK (PTE_MARKER_UFFD_WP)
-
-#ifdef CONFIG_PTE_MARKER
-
-static inline swp_entry_t make_pte_marker_entry(pte_marker marker)
+static inline swp_entry_t make_migration_entry_young(swp_entry_t entry)
{
- return swp_entry(SWP_PTE_MARKER, marker);
+ return entry;
}
-static inline bool is_pte_marker_entry(swp_entry_t entry)
+static inline bool is_migration_entry_young(swp_entry_t entry)
{
- return swp_type(entry) == SWP_PTE_MARKER;
+ return false;
}
-static inline pte_marker pte_marker_get(swp_entry_t entry)
+static inline swp_entry_t make_migration_entry_dirty(swp_entry_t entry)
{
- return swp_offset(entry) & PTE_MARKER_MASK;
+ return entry;
}
-static inline bool is_pte_marker(pte_t pte)
+static inline bool is_migration_entry_dirty(swp_entry_t entry)
{
- return is_swap_pte(pte) && is_pte_marker_entry(pte_to_swp_entry(pte));
+ return false;
}
+#endif /* CONFIG_MIGRATION */
+
+typedef unsigned long pte_marker;
-#else /* CONFIG_PTE_MARKER */
+#define PTE_MARKER_UFFD_WP BIT(0)
+#define PTE_MARKER_SWAPIN_ERROR BIT(1)
+#define PTE_MARKER_MASK (BIT(2) - 1)
static inline swp_entry_t make_pte_marker_entry(pte_marker marker)
{
- /* This should never be called if !CONFIG_PTE_MARKER */
- WARN_ON_ONCE(1);
- return swp_entry(0, 0);
+ return swp_entry(SWP_PTE_MARKER, marker);
}
static inline bool is_pte_marker_entry(swp_entry_t entry)
{
- return false;
+ return swp_type(entry) == SWP_PTE_MARKER;
}
static inline pte_marker pte_marker_get(swp_entry_t entry)
{
- return 0;
+ return swp_offset(entry) & PTE_MARKER_MASK;
}
static inline bool is_pte_marker(pte_t pte)
{
- return false;
+ return is_swap_pte(pte) && is_pte_marker_entry(pte_to_swp_entry(pte));
}
-#endif /* CONFIG_PTE_MARKER */
-
static inline pte_t make_pte_marker(pte_marker marker)
{
return swp_entry_to_pte(make_pte_marker_entry(marker));
}
+static inline swp_entry_t make_swapin_error_entry(void)
+{
+ return make_pte_marker_entry(PTE_MARKER_SWAPIN_ERROR);
+}
+
+static inline int is_swapin_error_entry(swp_entry_t entry)
+{
+ return is_pte_marker_entry(entry) &&
+ (pte_marker_get(entry) & PTE_MARKER_SWAPIN_ERROR);
+}
+
/*
* This is a special version to check pte_none() just to cover the case when
* the pte is a pte marker. It existed because in many cases the pte marker
@@ -358,9 +453,6 @@ static inline pte_t make_pte_marker(pte_marker marker)
* memory, kernel-only memory (including when the system is during-boot),
* non-ram based generic file-system. It's fine to be used even there, but the
* extra pte marker check will be pure overhead.
- *
- * For systems configured with !CONFIG_PTE_MARKER this will be automatically
- * optimized to pte_none().
*/
static inline int pte_none_mostly(pte_t pte)
{
@@ -369,7 +461,7 @@ static inline int pte_none_mostly(pte_t pte)
static inline struct page *pfn_swap_entry_to_page(swp_entry_t entry)
{
- struct page *p = pfn_to_page(swp_offset(entry));
+ struct page *p = pfn_to_page(swp_offset_pfn(entry));
/*
* Any use of migration entries may only occur while the
@@ -387,6 +479,9 @@ static inline struct page *pfn_swap_entry_to_page(swp_entry_t entry)
*/
static inline bool is_pfn_swap_entry(swp_entry_t entry)
{
+ /* Make sure the swp offset can always store the needed fields */
+ BUILD_BUG_ON(SWP_TYPE_SHIFT < SWP_PFN_BITS);
+
return is_migration_entry(entry) || is_device_private_entry(entry) ||
is_device_exclusive_entry(entry);
}
@@ -426,7 +521,7 @@ static inline int is_pmd_migration_entry(pmd_t pmd)
{
return is_swap_pmd(pmd) && is_migration_entry(pmd_to_swp_entry(pmd));
}
-#else
+#else /* CONFIG_ARCH_ENABLE_THP_MIGRATION */
static inline int set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw,
struct page *page)
{
@@ -455,12 +550,10 @@ static inline int is_pmd_migration_entry(pmd_t pmd)
{
return 0;
}
-#endif
+#endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */
#ifdef CONFIG_MEMORY_FAILURE
-extern atomic_long_t num_poisoned_pages __read_mostly;
-
/*
* Support for hardware poisoned pages
*/
@@ -475,26 +568,6 @@ static inline int is_hwpoison_entry(swp_entry_t entry)
return swp_type(entry) == SWP_HWPOISON;
}
-static inline unsigned long hwpoison_entry_to_pfn(swp_entry_t entry)
-{
- return swp_offset(entry);
-}
-
-static inline void num_poisoned_pages_inc(void)
-{
- atomic_long_inc(&num_poisoned_pages);
-}
-
-static inline void num_poisoned_pages_dec(void)
-{
- atomic_long_dec(&num_poisoned_pages);
-}
-
-static inline void num_poisoned_pages_sub(long i)
-{
- atomic_long_sub(i, &num_poisoned_pages);
-}
-
#else
static inline swp_entry_t make_hwpoison_entry(struct page *page)
@@ -506,14 +579,6 @@ static inline int is_hwpoison_entry(swp_entry_t swp)
{
return 0;
}
-
-static inline void num_poisoned_pages_inc(void)
-{
-}
-
-static inline void num_poisoned_pages_sub(long i)
-{
-}
#endif
static inline int non_swap_entry(swp_entry_t entry)
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index a34b0f9a9972..33a0ee3bcb2e 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -264,6 +264,7 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event)
#define SC_VAL64(type, name) ((type) name##_hi << 32 | name##_lo)
#ifdef CONFIG_COMPAT
+#define SYSCALL32_DEFINE0 COMPAT_SYSCALL_DEFINE0
#define SYSCALL32_DEFINE1 COMPAT_SYSCALL_DEFINE1
#define SYSCALL32_DEFINE2 COMPAT_SYSCALL_DEFINE2
#define SYSCALL32_DEFINE3 COMPAT_SYSCALL_DEFINE3
@@ -271,6 +272,7 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event)
#define SYSCALL32_DEFINE5 COMPAT_SYSCALL_DEFINE5
#define SYSCALL32_DEFINE6 COMPAT_SYSCALL_DEFINE6
#else
+#define SYSCALL32_DEFINE0 SYSCALL_DEFINE0
#define SYSCALL32_DEFINE1 SYSCALL_DEFINE1
#define SYSCALL32_DEFINE2 SYSCALL_DEFINE2
#define SYSCALL32_DEFINE3 SYSCALL_DEFINE3
diff --git a/include/linux/syslog.h b/include/linux/syslog.h
index 86af908e2663..955f80e34d4f 100644
--- a/include/linux/syslog.h
+++ b/include/linux/syslog.h
@@ -8,6 +8,8 @@
#ifndef _LINUX_SYSLOG_H
#define _LINUX_SYSLOG_H
+#include <linux/wait.h>
+
/* Close the log. Currently a NOP. */
#define SYSLOG_ACTION_CLOSE 0
/* Open the log. Currently a NOP. */
@@ -35,5 +37,6 @@
#define SYSLOG_FROM_PROC 1
int do_syslog(int type, char __user *buf, int count, int source);
+extern wait_queue_head_t log_wait;
#endif /* _LINUX_SYSLOG_H */
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index a9fbe22732c3..ca7f05a130d2 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -295,7 +295,7 @@ struct tcp_sock {
u32 packets_out; /* Packets which are "in flight" */
u32 retrans_out; /* Retransmitted packets out */
u32 max_packets_out; /* max packets_out in last window */
- u32 max_packets_seq; /* right edge of max_packets_out flight */
+ u32 cwnd_usage_seq; /* right edge of cwnd usage tracking flight */
u16 urg_data; /* Saved octet of OOB data and control flags */
u8 ecn_flags; /* ECN status bits. */
@@ -388,6 +388,12 @@ struct tcp_sock {
u8 bpf_sock_ops_cb_flags; /* Control calling BPF programs
* values defined in uapi/linux/tcp.h
*/
+ u8 bpf_chg_cc_inprogress:1; /* In the middle of
+ * bpf_setsockopt(TCP_CONGESTION),
+ * it is to avoid the bpf_tcp_cc->init()
+ * to recur itself by calling
+ * bpf_setsockopt(TCP_CONGESTION, "itself").
+ */
#define BPF_SOCK_OPS_TEST_FLAG(TP, ARG) (TP->bpf_sock_ops_cb_flags & ARG)
#else
#define BPF_SOCK_OPS_TEST_FLAG(TP, ARG) 0
@@ -417,6 +423,7 @@ struct tcp_sock {
u32 probe_seq_start;
u32 probe_seq_end;
} mtu_probe;
+ u32 plb_rehash; /* PLB-triggered rehash attempts */
u32 mtu_info; /* We received an ICMP_FRAG_NEEDED / ICMPV6_PKT_TOOBIG
* while socket was owned by user.
*/
diff --git a/include/linux/termios_internal.h b/include/linux/termios_internal.h
new file mode 100644
index 000000000000..d77f29e5e2b7
--- /dev/null
+++ b/include/linux/termios_internal.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_TERMIOS_CONV_H
+#define _LINUX_TERMIOS_CONV_H
+
+#include <linux/uaccess.h>
+#include <asm/termios.h>
+
+/* intr=^C quit=^\ erase=del kill=^U
+ eof=^D vtime=\0 vmin=\1 sxtc=\0
+ start=^Q stop=^S susp=^Z eol=\0
+ reprint=^R discard=^O werase=^W lnext=^V
+ eol2=\0
+*/
+
+#ifdef VDSUSP
+#define INIT_C_CC_VDSUSP_EXTRA [VDSUSP] = 'Y'-0x40,
+#else
+#define INIT_C_CC_VDSUSP_EXTRA
+#endif
+
+#define INIT_C_CC { \
+ [VINTR] = 'C'-0x40, \
+ [VQUIT] = '\\'-0x40, \
+ [VERASE] = '\177', \
+ [VKILL] = 'U'-0x40, \
+ [VEOF] = 'D'-0x40, \
+ [VSTART] = 'Q'-0x40, \
+ [VSTOP] = 'S'-0x40, \
+ [VSUSP] = 'Z'-0x40, \
+ [VREPRINT] = 'R'-0x40, \
+ [VDISCARD] = 'O'-0x40, \
+ [VWERASE] = 'W'-0x40, \
+ [VLNEXT] = 'V'-0x40, \
+ INIT_C_CC_VDSUSP_EXTRA \
+ [VMIN] = 1 }
+
+int user_termio_to_kernel_termios(struct ktermios *, struct termio __user *);
+int kernel_termios_to_user_termio(struct termio __user *, struct ktermios *);
+#ifdef TCGETS2
+int user_termios_to_kernel_termios(struct ktermios *, struct termios2 __user *);
+int kernel_termios_to_user_termios(struct termios2 __user *, struct ktermios *);
+int user_termios_to_kernel_termios_1(struct ktermios *, struct termios __user *);
+int kernel_termios_to_user_termios_1(struct termios __user *, struct ktermios *);
+#else /* TCGETS2 */
+int user_termios_to_kernel_termios(struct ktermios *, struct termios __user *);
+int kernel_termios_to_user_termios(struct termios __user *, struct ktermios *);
+#endif /* TCGETS2 */
+
+#endif /* _LINUX_TERMIOS_CONV_H */
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 1386c713885d..5e093602e8fc 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -17,8 +17,6 @@
#include <linux/workqueue.h>
#include <uapi/linux/thermal.h>
-#define THERMAL_MAX_TRIPS 12
-
/* invalid cooling state */
#define THERMAL_CSTATE_INVALID -1UL
@@ -102,6 +100,7 @@ struct thermal_cooling_device_ops {
struct thermal_cooling_device {
int id;
char *type;
+ unsigned long max_state;
struct device device;
struct device_node *np;
void *devdata;
@@ -296,82 +295,43 @@ struct thermal_zone_params {
int offset;
};
-/**
- * struct thermal_zone_of_device_ops - callbacks for handling DT based zones
- *
- * Mandatory:
- * @get_temp: a pointer to a function that reads the sensor temperature.
- *
- * Optional:
- * @get_trend: a pointer to a function that reads the sensor temperature trend.
- * @set_trips: a pointer to a function that sets a temperature window. When
- * this window is left the driver must inform the thermal core via
- * thermal_zone_device_update.
- * @set_emul_temp: a pointer to a function that sets sensor emulated
- * temperature.
- * @set_trip_temp: a pointer to a function that sets the trip temperature on
- * hardware.
- * @change_mode: a pointer to a function that notifies the thermal zone
- * mode change.
- */
-struct thermal_zone_of_device_ops {
- int (*get_temp)(void *, int *);
- int (*get_trend)(void *, int, enum thermal_trend *);
- int (*set_trips)(void *, int, int);
- int (*set_emul_temp)(void *, int);
- int (*set_trip_temp)(void *, int, int);
- int (*change_mode) (void *, enum thermal_device_mode);
-};
-
/* Function declarations */
#ifdef CONFIG_THERMAL_OF
-int thermal_zone_of_get_sensor_id(struct device_node *tz_np,
- struct device_node *sensor_np,
- u32 *id);
-struct thermal_zone_device *
-thermal_zone_of_sensor_register(struct device *dev, int id, void *data,
- const struct thermal_zone_of_device_ops *ops);
-void thermal_zone_of_sensor_unregister(struct device *dev,
- struct thermal_zone_device *tz);
-struct thermal_zone_device *devm_thermal_zone_of_sensor_register(
- struct device *dev, int id, void *data,
- const struct thermal_zone_of_device_ops *ops);
-void devm_thermal_zone_of_sensor_unregister(struct device *dev,
- struct thermal_zone_device *tz);
-#else
+struct thermal_zone_device *thermal_of_zone_register(struct device_node *sensor, int id, void *data,
+ const struct thermal_zone_device_ops *ops);
-static inline int thermal_zone_of_get_sensor_id(struct device_node *tz_np,
- struct device_node *sensor_np,
- u32 *id)
-{
- return -ENOENT;
-}
-static inline struct thermal_zone_device *
-thermal_zone_of_sensor_register(struct device *dev, int id, void *data,
- const struct thermal_zone_of_device_ops *ops)
+struct thermal_zone_device *devm_thermal_of_zone_register(struct device *dev, int id, void *data,
+ const struct thermal_zone_device_ops *ops);
+
+void thermal_of_zone_unregister(struct thermal_zone_device *tz);
+
+void devm_thermal_of_zone_unregister(struct device *dev, struct thermal_zone_device *tz);
+
+void thermal_of_zone_unregister(struct thermal_zone_device *tz);
+
+#else
+static inline
+struct thermal_zone_device *thermal_of_zone_register(struct device_node *sensor, int id, void *data,
+ const struct thermal_zone_device_ops *ops)
{
- return ERR_PTR(-ENODEV);
+ return ERR_PTR(-ENOTSUPP);
}
static inline
-void thermal_zone_of_sensor_unregister(struct device *dev,
- struct thermal_zone_device *tz)
+struct thermal_zone_device *devm_thermal_of_zone_register(struct device *dev, int id, void *data,
+ const struct thermal_zone_device_ops *ops)
{
+ return ERR_PTR(-ENOTSUPP);
}
-static inline struct thermal_zone_device *devm_thermal_zone_of_sensor_register(
- struct device *dev, int id, void *data,
- const struct thermal_zone_of_device_ops *ops)
+static inline void thermal_of_zone_unregister(struct thermal_zone_device *tz)
{
- return ERR_PTR(-ENODEV);
}
-static inline
-void devm_thermal_zone_of_sensor_unregister(struct device *dev,
- struct thermal_zone_device *tz)
+static inline void devm_thermal_of_zone_unregister(struct device *dev,
+ struct thermal_zone_device *tz)
{
}
-
#endif
#ifdef CONFIG_THERMAL
diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h
index 9f442d73f3df..90cd08ab2f5d 100644
--- a/include/linux/thunderbolt.h
+++ b/include/linux/thunderbolt.h
@@ -187,6 +187,7 @@ void tb_unregister_property_dir(const char *key, struct tb_property_dir *dir);
* @device_name: Name of the device (or %NULL if not known)
* @link_speed: Speed of the link in Gb/s
* @link_width: Width of the link (1 or 2)
+ * @link_usb4: Downstream link is USB4
* @is_unplugged: The XDomain is unplugged
* @needs_uuid: If the XDomain does not have @remote_uuid it will be
* queried first
@@ -234,6 +235,7 @@ struct tb_xdomain {
const char *device_name;
unsigned int link_speed;
unsigned int link_width;
+ bool link_usb4;
bool is_unplugged;
bool needs_uuid;
struct ida service_ids;
diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h
index 3146f1c056c9..bb9d3f5542f8 100644
--- a/include/linux/time_namespace.h
+++ b/include/linux/time_namespace.h
@@ -45,6 +45,7 @@ struct time_namespace *copy_time_ns(unsigned long flags,
void free_time_ns(struct time_namespace *ns);
void timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk);
struct vdso_data *arch_get_vdso_data(void *vvar_page);
+struct page *find_timens_vvar_page(struct vm_area_struct *vma);
static inline void put_time_ns(struct time_namespace *ns)
{
@@ -141,6 +142,11 @@ static inline void timens_on_fork(struct nsproxy *nsproxy,
return;
}
+static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma)
+{
+ return NULL;
+}
+
static inline void timens_add_monotonic(struct timespec64 *ts) { }
static inline void timens_add_boottime(struct timespec64 *ts) { }
diff --git a/include/linux/timer.h b/include/linux/timer.h
index 648f00105f58..9162f275819a 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -169,7 +169,6 @@ static inline int timer_pending(const struct timer_list * timer)
}
extern void add_timer_on(struct timer_list *timer, int cpu);
-extern int del_timer(struct timer_list * timer);
extern int mod_timer(struct timer_list *timer, unsigned long expires);
extern int mod_timer_pending(struct timer_list *timer, unsigned long expires);
extern int timer_reduce(struct timer_list *timer, unsigned long expires);
@@ -183,14 +182,36 @@ extern int timer_reduce(struct timer_list *timer, unsigned long expires);
extern void add_timer(struct timer_list *timer);
extern int try_to_del_timer_sync(struct timer_list *timer);
+extern int timer_delete_sync(struct timer_list *timer);
+extern int timer_delete(struct timer_list *timer);
+extern int timer_shutdown_sync(struct timer_list *timer);
+extern int timer_shutdown(struct timer_list *timer);
-#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
- extern int del_timer_sync(struct timer_list *timer);
-#else
-# define del_timer_sync(t) del_timer(t)
-#endif
+/**
+ * del_timer_sync - Delete a pending timer and wait for a running callback
+ * @timer: The timer to be deleted
+ *
+ * See timer_delete_sync() for detailed explanation.
+ *
+ * Do not use in new code. Use timer_delete_sync() instead.
+ */
+static inline int del_timer_sync(struct timer_list *timer)
+{
+ return timer_delete_sync(timer);
+}
-#define del_singleshot_timer_sync(t) del_timer_sync(t)
+/**
+ * del_timer - Delete a pending timer
+ * @timer: The timer to be deleted
+ *
+ * See timer_delete() for detailed explanation.
+ *
+ * Do not use in new code. Use timer_delete() instead.
+ */
+static inline int del_timer(struct timer_list *timer)
+{
+ return timer_delete(timer);
+}
extern void init_timers(void);
struct hrtimer;
diff --git a/include/linux/timerqueue.h b/include/linux/timerqueue.h
index 93884086f392..adc80e29168e 100644
--- a/include/linux/timerqueue.h
+++ b/include/linux/timerqueue.h
@@ -35,7 +35,7 @@ struct timerqueue_node *timerqueue_getnext(struct timerqueue_head *head)
{
struct rb_node *leftmost = rb_first_cached(&head->rb_root);
- return rb_entry(leftmost, struct timerqueue_node, node);
+ return rb_entry_safe(leftmost, struct timerqueue_node, node);
}
static inline void timerqueue_init(struct timerqueue_node *node)
diff --git a/include/linux/tnum.h b/include/linux/tnum.h
index 498dbcedb451..1c3948a1d6ad 100644
--- a/include/linux/tnum.h
+++ b/include/linux/tnum.h
@@ -21,7 +21,12 @@ struct tnum {
struct tnum tnum_const(u64 value);
/* A completely unknown value */
extern const struct tnum tnum_unknown;
-/* A value that's unknown except that @min <= value <= @max */
+/* An unknown value that is a superset of @min <= value <= @max.
+ *
+ * Could include values outside the range of [@min, @max].
+ * For example tnum_range(0, 2) is represented by {0, 1, 2, *3*},
+ * rather than the intended set of {0, 1, 2}.
+ */
struct tnum tnum_range(u64 min, u64 max);
/* Arithmetic and logical ops */
@@ -73,7 +78,18 @@ static inline bool tnum_is_unknown(struct tnum a)
*/
bool tnum_is_aligned(struct tnum a, u64 size);
-/* Returns true if @b represents a subset of @a. */
+/* Returns true if @b represents a subset of @a.
+ *
+ * Note that using tnum_range() as @a requires extra cautions as tnum_in() may
+ * return true unexpectedly due to tnum limited ability to represent tight
+ * range, e.g.
+ *
+ * tnum_in(tnum_range(0, 2), tnum_const(3)) == true
+ *
+ * As a rule of thumb, if @a is explicitly coded rather than coming from
+ * reg->var_off, it should be in form of tnum_const(), tnum_range(0, 2**n - 1),
+ * or tnum_range(2**n, 2**(n+1) - 1).
+ */
bool tnum_in(struct tnum a, struct tnum b);
/* Formatting functions. These have snprintf-like semantics: they will write
diff --git a/include/linux/trace.h b/include/linux/trace.h
index bf169612ffe1..80ffda871749 100644
--- a/include/linux/trace.h
+++ b/include/linux/trace.h
@@ -2,8 +2,6 @@
#ifndef _LINUX_TRACE_H
#define _LINUX_TRACE_H
-#ifdef CONFIG_TRACING
-
#define TRACE_EXPORT_FUNCTION BIT(0)
#define TRACE_EXPORT_EVENT BIT(1)
#define TRACE_EXPORT_MARKER BIT(2)
@@ -28,11 +26,13 @@ struct trace_export {
int flags;
};
+struct trace_array;
+
+#ifdef CONFIG_TRACING
+
int register_ftrace_export(struct trace_export *export);
int unregister_ftrace_export(struct trace_export *export);
-struct trace_array;
-
void trace_printk_init_buffers(void);
__printf(3, 4)
int trace_array_printk(struct trace_array *tr, unsigned long ip,
@@ -48,6 +48,38 @@ void osnoise_arch_unregister(void);
void osnoise_trace_irq_entry(int id);
void osnoise_trace_irq_exit(int id, const char *desc);
+#else /* CONFIG_TRACING */
+static inline int register_ftrace_export(struct trace_export *export)
+{
+ return -EINVAL;
+}
+static inline int unregister_ftrace_export(struct trace_export *export)
+{
+ return 0;
+}
+static inline void trace_printk_init_buffers(void)
+{
+}
+static inline int trace_array_printk(struct trace_array *tr, unsigned long ip,
+ const char *fmt, ...)
+{
+ return 0;
+}
+static inline int trace_array_init_printk(struct trace_array *tr)
+{
+ return -EINVAL;
+}
+static inline void trace_array_put(struct trace_array *tr)
+{
+}
+static inline struct trace_array *trace_array_get_by_name(const char *name)
+{
+ return NULL;
+}
+static inline int trace_array_destroy(struct trace_array *tr)
+{
+ return 0;
+}
#endif /* CONFIG_TRACING */
#endif /* _LINUX_TRACE_H */
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index b18759a673c6..20749bd9db71 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -92,6 +92,7 @@ struct trace_iterator {
unsigned int temp_size;
char *fmt; /* modified format holder */
unsigned int fmt_size;
+ long wait_index;
/* trace_seq for __print_flags() and __print_symbolic() etc. */
struct trace_seq tmp_seq;
@@ -814,8 +815,6 @@ extern int trace_add_event_call(struct trace_event_call *call);
extern int trace_remove_event_call(struct trace_event_call *call);
extern int trace_event_get_offsets(struct trace_event_call *call);
-#define is_signed_type(type) (((type)(-1)) < (type)1)
-
int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set);
int trace_set_clr_event(const char *system, const char *event, int set);
int trace_array_set_clr_event(struct trace_array *tr, const char *system,
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 7b0a5d478ef6..730c3301d710 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -122,8 +122,6 @@ struct tty_operations;
/**
* struct tty_struct - state associated with a tty while open
*
- * @magic: magic value set early in @alloc_tty_struct to %TTY_MAGIC, for
- * debugging purposes
* @kref: reference counting by tty_kref_get() and tty_kref_put(), reaching zero
* frees the structure
* @dev: class device or %NULL (e.g. ptys, serdev)
@@ -193,7 +191,6 @@ struct tty_operations;
* &struct tty_port.
*/
struct tty_struct {
- int magic;
struct kref kref;
struct device *dev;
struct tty_driver *driver;
@@ -260,9 +257,6 @@ struct tty_file_private {
struct list_head list;
};
-/* tty magic number */
-#define TTY_MAGIC 0x5401
-
/**
* DOC: TTY Struct Flags
*
@@ -434,7 +428,7 @@ int tty_hung_up_p(struct file *filp);
void do_SAK(struct tty_struct *tty);
void __do_SAK(struct tty_struct *tty);
void no_tty(void);
-speed_t tty_termios_baud_rate(struct ktermios *termios);
+speed_t tty_termios_baud_rate(const struct ktermios *termios);
void tty_termios_encode_baud_rate(struct ktermios *termios, speed_t ibaud,
speed_t obaud);
void tty_encode_baud_rate(struct tty_struct *tty, speed_t ibaud,
@@ -458,7 +452,7 @@ static inline speed_t tty_get_baud_rate(struct tty_struct *tty)
unsigned char tty_get_char_size(unsigned int cflag);
unsigned char tty_get_frame_size(unsigned int cflag);
-void tty_termios_copy_hw(struct ktermios *new, struct ktermios *old);
+void tty_termios_copy_hw(struct ktermios *new, const struct ktermios *old);
int tty_termios_hw_change(const struct ktermios *a, const struct ktermios *b);
int tty_set_termios(struct tty_struct *tty, struct ktermios *kt);
diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index 4841d8069c07..e00034118c7b 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -7,6 +7,7 @@
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/cdev.h>
+#include <linux/uaccess.h>
#include <linux/termios.h>
#include <linux/seq_file.h>
@@ -141,7 +142,7 @@ struct serial_struct;
*
* Optional.
*
- * @set_termios: ``void ()(struct tty_struct *tty, struct ktermios *old)``
+ * @set_termios: ``void ()(struct tty_struct *tty, const struct ktermios *old)``
*
* This routine allows the @tty driver to be notified when device's
* termios settings have changed. New settings are in @tty->termios.
@@ -365,7 +366,7 @@ struct tty_operations {
unsigned int cmd, unsigned long arg);
long (*compat_ioctl)(struct tty_struct *tty,
unsigned int cmd, unsigned long arg);
- void (*set_termios)(struct tty_struct *tty, struct ktermios * old);
+ void (*set_termios)(struct tty_struct *tty, const struct ktermios *old);
void (*throttle)(struct tty_struct * tty);
void (*unthrottle)(struct tty_struct * tty);
void (*stop)(struct tty_struct *tty);
@@ -396,7 +397,6 @@ struct tty_operations {
/**
* struct tty_driver -- driver for TTY devices
*
- * @magic: set to %TTY_DRIVER_MAGIC in __tty_alloc_driver()
* @kref: reference counting. Reaching zero frees all the internals and the
* driver.
* @cdevs: allocated/registered character /dev devices
@@ -432,7 +432,6 @@ struct tty_operations {
* @driver_name, @name, @type, @subtype, @init_termios, and @ops.
*/
struct tty_driver {
- int magic;
struct kref kref;
struct cdev **cdevs;
struct module *owner;
@@ -489,9 +488,6 @@ static inline void tty_set_operations(struct tty_driver *driver,
driver->ops = op;
}
-/* tty driver magic number */
-#define TTY_DRIVER_MAGIC 0x5402
-
/**
* DOC: TTY Driver Flags
*
diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h
index ede6f2157f32..dcb61ec11424 100644
--- a/include/linux/tty_ldisc.h
+++ b/include/linux/tty_ldisc.h
@@ -130,7 +130,7 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass,
* a pointer to wordsize-sensitive structure belongs here, but most of
* ldiscs will happily leave it %NULL.
*
- * @set_termios: [TTY] ``void ()(struct tty_struct *tty, struct ktermios *old)``
+ * @set_termios: [TTY] ``void ()(struct tty_struct *tty, const struct ktermios *old)``
*
* This function notifies the line discpline that a change has been made
* to the termios structure.
@@ -227,7 +227,7 @@ struct tty_ldisc_ops {
unsigned long arg);
int (*compat_ioctl)(struct tty_struct *tty, unsigned int cmd,
unsigned long arg);
- void (*set_termios)(struct tty_struct *tty, struct ktermios *old);
+ void (*set_termios)(struct tty_struct *tty, const struct ktermios *old);
__poll_t (*poll)(struct tty_struct *tty, struct file *file,
struct poll_table_struct *wait);
void (*hangup)(struct tty_struct *tty);
diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h
index 6ad4e9032d53..46040d66334a 100644
--- a/include/linux/u64_stats_sync.h
+++ b/include/linux/u64_stats_sync.h
@@ -8,7 +8,7 @@
*
* Key points :
*
- * - Use a seqcount on 32-bit SMP, only disable preemption for 32-bit UP.
+ * - Use a seqcount on 32-bit
* - The whole thing is a no-op on 64-bit architectures.
*
* Usage constraints:
@@ -20,7 +20,8 @@
* writer and also spin forever.
*
* 3) Write side must use the _irqsave() variant if other writers, or a reader,
- * can be invoked from an IRQ context.
+ * can be invoked from an IRQ context. On 64bit systems this variant does not
+ * disable interrupts.
*
* 4) If reader fetches several counters, there is no guarantee the whole values
* are consistent w.r.t. each other (remember point #2: seqcounts are not
@@ -29,11 +30,6 @@
* 5) Readers are allowed to sleep or be preempted/interrupted: they perform
* pure reads.
*
- * 6) Readers must use both u64_stats_fetch_{begin,retry}_irq() if the stats
- * might be updated from a hardirq or softirq context (remember point #1:
- * seqcounts are not used for UP kernels). 32-bit UP stat readers could read
- * corrupted 64-bit values otherwise.
- *
* Usage :
*
* Stats producer (writer) should use following template granted it already got
@@ -66,7 +62,7 @@
#include <linux/seqlock.h>
struct u64_stats_sync {
-#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
+#if BITS_PER_LONG == 32
seqcount_t seq;
#endif
};
@@ -98,7 +94,22 @@ static inline void u64_stats_inc(u64_stats_t *p)
local64_inc(&p->v);
}
-#else
+static inline void u64_stats_init(struct u64_stats_sync *syncp) { }
+static inline void __u64_stats_update_begin(struct u64_stats_sync *syncp) { }
+static inline void __u64_stats_update_end(struct u64_stats_sync *syncp) { }
+static inline unsigned long __u64_stats_irqsave(void) { return 0; }
+static inline void __u64_stats_irqrestore(unsigned long flags) { }
+static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
+{
+ return 0;
+}
+static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
+ unsigned int start)
+{
+ return false;
+}
+
+#else /* 64 bit */
typedef struct {
u64 v;
@@ -123,123 +134,95 @@ static inline void u64_stats_inc(u64_stats_t *p)
{
p->v++;
}
-#endif
-#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
-#define u64_stats_init(syncp) seqcount_init(&(syncp)->seq)
-#else
static inline void u64_stats_init(struct u64_stats_sync *syncp)
{
+ seqcount_init(&syncp->seq);
}
-#endif
-static inline void u64_stats_update_begin(struct u64_stats_sync *syncp)
+static inline void __u64_stats_update_begin(struct u64_stats_sync *syncp)
{
-#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
- if (IS_ENABLED(CONFIG_PREEMPT_RT))
- preempt_disable();
+ preempt_disable_nested();
write_seqcount_begin(&syncp->seq);
-#endif
}
-static inline void u64_stats_update_end(struct u64_stats_sync *syncp)
+static inline void __u64_stats_update_end(struct u64_stats_sync *syncp)
{
-#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
write_seqcount_end(&syncp->seq);
- if (IS_ENABLED(CONFIG_PREEMPT_RT))
- preempt_enable();
-#endif
+ preempt_enable_nested();
}
-static inline unsigned long
-u64_stats_update_begin_irqsave(struct u64_stats_sync *syncp)
+static inline unsigned long __u64_stats_irqsave(void)
{
- unsigned long flags = 0;
+ unsigned long flags;
-#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
- if (IS_ENABLED(CONFIG_PREEMPT_RT))
- preempt_disable();
- else
- local_irq_save(flags);
- write_seqcount_begin(&syncp->seq);
-#endif
+ local_irq_save(flags);
return flags;
}
-static inline void
-u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp,
- unsigned long flags)
+static inline void __u64_stats_irqrestore(unsigned long flags)
{
-#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
- write_seqcount_end(&syncp->seq);
- if (IS_ENABLED(CONFIG_PREEMPT_RT))
- preempt_enable();
- else
- local_irq_restore(flags);
-#endif
+ local_irq_restore(flags);
}
static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
{
-#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
return read_seqcount_begin(&syncp->seq);
-#else
- return 0;
-#endif
}
-static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
+static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
+ unsigned int start)
{
-#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT))
- preempt_disable();
-#endif
- return __u64_stats_fetch_begin(syncp);
+ return read_seqcount_retry(&syncp->seq, start);
}
+#endif /* !64 bit */
-static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
- unsigned int start)
+static inline void u64_stats_update_begin(struct u64_stats_sync *syncp)
{
-#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
- return read_seqcount_retry(&syncp->seq, start);
-#else
- return false;
-#endif
+ __u64_stats_update_begin(syncp);
+}
+
+static inline void u64_stats_update_end(struct u64_stats_sync *syncp)
+{
+ __u64_stats_update_end(syncp);
+}
+
+static inline unsigned long u64_stats_update_begin_irqsave(struct u64_stats_sync *syncp)
+{
+ unsigned long flags = __u64_stats_irqsave();
+
+ __u64_stats_update_begin(syncp);
+ return flags;
+}
+
+static inline void u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp,
+ unsigned long flags)
+{
+ __u64_stats_update_end(syncp);
+ __u64_stats_irqrestore(flags);
+}
+
+static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
+{
+ return __u64_stats_fetch_begin(syncp);
}
static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
unsigned int start)
{
-#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT))
- preempt_enable();
-#endif
return __u64_stats_fetch_retry(syncp, start);
}
-/*
- * In case irq handlers can update u64 counters, readers can use following helpers
- * - SMP 32bit arches use seqcount protection, irq safe.
- * - UP 32bit must disable irqs.
- * - 64bit have no problem atomically reading u64 values, irq safe.
- */
+/* Obsolete interfaces */
static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync *syncp)
{
-#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT)
- preempt_disable();
-#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP)
- local_irq_disable();
-#endif
- return __u64_stats_fetch_begin(syncp);
+ return u64_stats_fetch_begin(syncp);
}
static inline bool u64_stats_fetch_retry_irq(const struct u64_stats_sync *syncp,
unsigned int start)
{
-#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT)
- preempt_enable();
-#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP)
- local_irq_enable();
-#endif
- return __u64_stats_fetch_retry(syncp, start);
+ return u64_stats_fetch_retry(syncp, start);
}
#endif /* _LINUX_U64_STATS_SYNC_H */
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 47e5d374c7eb..afb18f198843 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -58,20 +58,28 @@
static __always_inline __must_check unsigned long
__copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
{
- instrument_copy_from_user(to, from, n);
+ unsigned long res;
+
+ instrument_copy_from_user_before(to, from, n);
check_object_size(to, n, false);
- return raw_copy_from_user(to, from, n);
+ res = raw_copy_from_user(to, from, n);
+ instrument_copy_from_user_after(to, from, n, res);
+ return res;
}
static __always_inline __must_check unsigned long
__copy_from_user(void *to, const void __user *from, unsigned long n)
{
+ unsigned long res;
+
might_fault();
+ instrument_copy_from_user_before(to, from, n);
if (should_fail_usercopy())
return n;
- instrument_copy_from_user(to, from, n);
check_object_size(to, n, false);
- return raw_copy_from_user(to, from, n);
+ res = raw_copy_from_user(to, from, n);
+ instrument_copy_from_user_after(to, from, n, res);
+ return res;
}
/**
@@ -115,8 +123,9 @@ _copy_from_user(void *to, const void __user *from, unsigned long n)
unsigned long res = n;
might_fault();
if (!should_fail_usercopy() && likely(access_ok(from, n))) {
- instrument_copy_from_user(to, from, n);
+ instrument_copy_from_user_before(to, from, n);
res = raw_copy_from_user(to, from, n);
+ instrument_copy_from_user_after(to, from, n, res);
}
if (unlikely(res))
memset(to + (n - res), 0, res);
diff --git a/include/linux/ucb1400.h b/include/linux/ucb1400.h
index 22345391350b..2516082cd3a9 100644
--- a/include/linux/ucb1400.h
+++ b/include/linux/ucb1400.h
@@ -23,7 +23,7 @@
#include <sound/ac97_codec.h>
#include <linux/mutex.h>
#include <linux/platform_device.h>
-#include <linux/gpio.h>
+#include <linux/gpio/driver.h>
/*
* UCB1400 AC-link registers
diff --git a/include/linux/udp.h b/include/linux/udp.h
index 254a2654400f..a2892e151644 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -23,7 +23,9 @@ static inline struct udphdr *udp_hdr(const struct sk_buff *skb)
return (struct udphdr *)skb_transport_header(skb);
}
+#define UDP_HTABLE_SIZE_MIN_PERNET 128
#define UDP_HTABLE_SIZE_MIN (CONFIG_BASE_SMALL ? 128 : 256)
+#define UDP_HTABLE_SIZE_MAX 65536
static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask)
{
@@ -70,6 +72,8 @@ struct udp_sock {
* For encapsulation sockets.
*/
int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
+ void (*encap_err_rcv)(struct sock *sk, struct sk_buff *skb, int err,
+ __be16 port, u32 info, u8 *payload);
int (*encap_err_lookup)(struct sock *sk, struct sk_buff *skb);
void (*encap_destroy)(struct sock *sk);
@@ -86,6 +90,9 @@ struct udp_sock {
/* This field is dirtied by udp_recvmsg() */
int forward_deficit;
+
+ /* This fields follows rcvbuf value, and is touched by udp_recvmsg */
+ int forward_threshold;
};
#define UDP_MAX_SEGMENTS (1 << 6UL)
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 5896af36199c..9f158238edba 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -29,6 +29,9 @@ enum iter_type {
ITER_UBUF,
};
+#define ITER_SOURCE 1 // == WRITE
+#define ITER_DEST 0 // == READ
+
struct iov_iter_state {
size_t iov_offset;
size_t count;
@@ -247,8 +250,14 @@ void iov_iter_pipe(struct iov_iter *i, unsigned int direction, struct pipe_inode
void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count);
void iov_iter_xarray(struct iov_iter *i, unsigned int direction, struct xarray *xarray,
loff_t start, size_t count);
+ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
+ size_t maxsize, unsigned maxpages, size_t *start,
+ unsigned gup_flags);
ssize_t iov_iter_get_pages2(struct iov_iter *i, struct page **pages,
size_t maxsize, unsigned maxpages, size_t *start);
+ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
+ struct page ***pages, size_t maxsize, size_t *start,
+ unsigned gup_flags);
ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i, struct page ***pages,
size_t maxsize, size_t *start);
int iov_iter_npages(const struct iov_iter *i, int maxpages);
@@ -298,7 +307,7 @@ iov_iter_npages_cap(struct iov_iter *i, int maxpages, size_t max_bytes)
shorted = iov_iter_count(i) - max_bytes;
iov_iter_truncate(i, max_bytes);
}
- npages = iov_iter_npages(i, INT_MAX);
+ npages = iov_iter_npages(i, maxpages);
if (shorted)
iov_iter_reexpand(i, iov_iter_count(i) + shorted);
diff --git a/include/linux/umh.h b/include/linux/umh.h
index 244aff638220..5d1f6129b847 100644
--- a/include/linux/umh.h
+++ b/include/linux/umh.h
@@ -11,10 +11,11 @@
struct cred;
struct file;
-#define UMH_NO_WAIT 0 /* don't wait at all */
-#define UMH_WAIT_EXEC 1 /* wait for the exec, but not the process */
-#define UMH_WAIT_PROC 2 /* wait for the process to complete */
-#define UMH_KILLABLE 4 /* wait for EXEC/PROC killable */
+#define UMH_NO_WAIT 0x00 /* don't wait at all */
+#define UMH_WAIT_EXEC 0x01 /* wait for the exec, but not the process */
+#define UMH_WAIT_PROC 0x02 /* wait for the process to complete */
+#define UMH_KILLABLE 0x04 /* wait for EXEC/PROC killable */
+#define UMH_FREEZABLE 0x08 /* wait for EXEC/PROC freezable */
struct subprocess_info {
struct work_struct work;
diff --git a/include/linux/units.h b/include/linux/units.h
index 681fc652e3d7..2793a41e73a2 100644
--- a/include/linux/units.h
+++ b/include/linux/units.h
@@ -20,6 +20,9 @@
#define PICO 1000000000000ULL
#define FEMTO 1000000000000000ULL
+#define NANOHZ_PER_HZ 1000000000UL
+#define MICROHZ_PER_HZ 1000000UL
+#define MILLIHZ_PER_HZ 1000UL
#define HZ_PER_KHZ 1000UL
#define KHZ_PER_MHZ 1000UL
#define HZ_PER_MHZ 1000000UL
diff --git a/include/linux/usb.h b/include/linux/usb.h
index f7a9914fc97f..d2d2f41052c0 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -575,6 +575,7 @@ struct usb3_lpm_parameters {
* @devaddr: device address, XHCI: assigned by HW, others: same as devnum
* @can_submit: URBs may be submitted
* @persist_enabled: USB_PERSIST enabled for this device
+ * @reset_in_progress: the device is being reset
* @have_langid: whether string_langid is valid
* @authorized: policy has said we can use it;
* (user space) policy determines if we authorize this device to be
@@ -662,6 +663,7 @@ struct usb_device {
unsigned can_submit:1;
unsigned persist_enabled:1;
+ unsigned reset_in_progress:1;
unsigned have_langid:1;
unsigned authorized:1;
unsigned authenticated:1;
@@ -1827,6 +1829,7 @@ static inline int usb_get_ptm_status(struct usb_device *dev, void *data)
extern int usb_string(struct usb_device *dev, int index,
char *buf, size_t size);
+extern char *usb_cache_string(struct usb_device *udev, int index);
/* wrappers that also update important state inside usbcore */
extern int usb_clear_halt(struct usb_device *dev, int pipe);
diff --git a/include/linux/usb/chipidea.h b/include/linux/usb/chipidea.h
index edf3342507f1..ee38835ed77c 100644
--- a/include/linux/usb/chipidea.h
+++ b/include/linux/usb/chipidea.h
@@ -62,6 +62,7 @@ struct ci_hdrc_platform_data {
#define CI_HDRC_REQUIRES_ALIGNED_DMA BIT(13)
#define CI_HDRC_IMX_IS_HSIC BIT(14)
#define CI_HDRC_PMQOS BIT(15)
+#define CI_HDRC_PHY_VBUS_CONTROL BIT(16)
enum usb_dr_mode dr_mode;
#define CI_HDRC_CONTROLLER_RESET_EVENT 0
#define CI_HDRC_CONTROLLER_STOPPED_EVENT 1
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index 67f8713d3fa3..78cd566ee238 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -479,7 +479,6 @@ static inline int ehset_single_step_set_feature(struct usb_hcd *hcd, int port)
struct pci_dev;
struct pci_device_id;
extern int usb_hcd_pci_probe(struct pci_dev *dev,
- const struct pci_device_id *id,
const struct hc_driver *driver);
extern void usb_hcd_pci_remove(struct pci_dev *dev);
extern void usb_hcd_pci_shutdown(struct pci_dev *dev);
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index 8ea319f89e1f..f7bfedb740f5 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -276,8 +276,8 @@ struct usb_serial_driver {
unsigned int cmd, unsigned long arg);
void (*get_serial)(struct tty_struct *tty, struct serial_struct *ss);
int (*set_serial)(struct tty_struct *tty, struct serial_struct *ss);
- void (*set_termios)(struct tty_struct *tty,
- struct usb_serial_port *port, struct ktermios *old);
+ void (*set_termios)(struct tty_struct *tty, struct usb_serial_port *port,
+ const struct ktermios *old);
void (*break_ctl)(struct tty_struct *tty, int break_state);
unsigned int (*chars_in_buffer)(struct tty_struct *tty);
void (*wait_until_sent)(struct tty_struct *tty, long timeout);
diff --git a/include/linux/usb/tcpci.h b/include/linux/usb/tcpci.h
index 20c0bedb8ec8..17657451c762 100644
--- a/include/linux/usb/tcpci.h
+++ b/include/linux/usb/tcpci.h
@@ -167,6 +167,11 @@
/* I2C_WRITE_BYTE_COUNT + 1 when TX_BUF_BYTE_x is only accessible I2C_WRITE_BYTE_COUNT */
#define TCPC_TRANSMIT_BUFFER_MAX_LEN 31
+#define tcpc_presenting_rd(reg, cc) \
+ (!(TCPC_ROLE_CTRL_DRP & (reg)) && \
+ (((reg) & (TCPC_ROLE_CTRL_## cc ##_MASK << TCPC_ROLE_CTRL_## cc ##_SHIFT)) == \
+ (TCPC_ROLE_CTRL_CC_RD << TCPC_ROLE_CTRL_## cc ##_SHIFT)))
+
struct tcpci;
/*
@@ -207,4 +212,21 @@ irqreturn_t tcpci_irq(struct tcpci *tcpci);
struct tcpm_port;
struct tcpm_port *tcpci_get_tcpm_port(struct tcpci *tcpci);
+
+static inline enum typec_cc_status tcpci_to_typec_cc(unsigned int cc, bool sink)
+{
+ switch (cc) {
+ case 0x1:
+ return sink ? TYPEC_CC_RP_DEF : TYPEC_CC_RA;
+ case 0x2:
+ return sink ? TYPEC_CC_RP_1_5 : TYPEC_CC_RD;
+ case 0x3:
+ if (sink)
+ return TYPEC_CC_RP_3_0;
+ fallthrough;
+ case 0x0:
+ default:
+ return TYPEC_CC_OPEN;
+ }
+}
#endif /* __LINUX_USB_TCPCI_H */
diff --git a/include/linux/usb/typec_dp.h b/include/linux/usb/typec_dp.h
index cfb916cccd31..8d09c2f0a9b8 100644
--- a/include/linux/usb/typec_dp.h
+++ b/include/linux/usb/typec_dp.h
@@ -73,6 +73,11 @@ enum {
#define DP_CAP_USB BIT(7)
#define DP_CAP_DFP_D_PIN_ASSIGN(_cap_) (((_cap_) & GENMASK(15, 8)) >> 8)
#define DP_CAP_UFP_D_PIN_ASSIGN(_cap_) (((_cap_) & GENMASK(23, 16)) >> 16)
+/* Get pin assignment taking plug & receptacle into consideration */
+#define DP_CAP_PIN_ASSIGN_UFP_D(_cap_) ((_cap_ & DP_CAP_RECEPTACLE) ? \
+ DP_CAP_UFP_D_PIN_ASSIGN(_cap_) : DP_CAP_DFP_D_PIN_ASSIGN(_cap_))
+#define DP_CAP_PIN_ASSIGN_DFP_D(_cap_) ((_cap_ & DP_CAP_RECEPTACLE) ? \
+ DP_CAP_DFP_D_PIN_ASSIGN(_cap_) : DP_CAP_UFP_D_PIN_ASSIGN(_cap_))
/* DisplayPort Status Update VDO bits */
#define DP_STATUS_CONNECTION(_status_) ((_status_) & 3)
diff --git a/include/linux/user_events.h b/include/linux/user_events.h
index 736e05603463..592a3fbed98e 100644
--- a/include/linux/user_events.h
+++ b/include/linux/user_events.h
@@ -20,15 +20,6 @@
#define USER_EVENTS_SYSTEM "user_events"
#define USER_EVENTS_PREFIX "u:"
-/* Bits 0-6 are for known probe types, Bit 7 is for unknown probes */
-#define EVENT_BIT_FTRACE 0
-#define EVENT_BIT_PERF 1
-#define EVENT_BIT_OTHER 7
-
-#define EVENT_STATUS_FTRACE (1 << EVENT_BIT_FTRACE)
-#define EVENT_STATUS_PERF (1 << EVENT_BIT_PERF)
-#define EVENT_STATUS_OTHER (1 << EVENT_BIT_OTHER)
-
/* Create dynamic location entry within a 32-bit value */
#define DYN_LOC(offset, size) ((size) << 16 | (offset))
@@ -45,12 +36,12 @@ struct user_reg {
/* Input: Pointer to string with event name, description and flags */
__u64 name_args;
- /* Output: Byte index of the event within the status page */
- __u32 status_index;
+ /* Output: Bitwise index of the event within the status page */
+ __u32 status_bit;
/* Output: Index of the event to use when writing data */
__u32 write_index;
-};
+} __attribute__((__packed__));
#define DIAG_IOC_MAGIC '*'
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 33a4240e6a6f..45f09bec02c4 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -54,15 +54,17 @@ enum ucount_type {
UCOUNT_FANOTIFY_GROUPS,
UCOUNT_FANOTIFY_MARKS,
#endif
+ UCOUNT_COUNTS,
+};
+
+enum rlimit_type {
UCOUNT_RLIMIT_NPROC,
UCOUNT_RLIMIT_MSGQUEUE,
UCOUNT_RLIMIT_SIGPENDING,
UCOUNT_RLIMIT_MEMLOCK,
- UCOUNT_COUNTS,
+ UCOUNT_RLIMIT_COUNTS,
};
-#define MAX_PER_NAMESPACE_UCOUNTS UCOUNT_RLIMIT_NPROC
-
struct user_namespace {
struct uid_gid_map uid_map;
struct uid_gid_map gid_map;
@@ -99,6 +101,7 @@ struct user_namespace {
#endif
struct ucounts *ucounts;
long ucount_max[UCOUNT_COUNTS];
+ long rlimit_max[UCOUNT_RLIMIT_COUNTS];
} __randomize_layout;
struct ucounts {
@@ -107,6 +110,7 @@ struct ucounts {
kuid_t uid;
atomic_t count;
atomic_long_t ucount[UCOUNT_COUNTS];
+ atomic_long_t rlimit[UCOUNT_RLIMIT_COUNTS];
};
extern struct user_namespace init_user_ns;
@@ -120,21 +124,26 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid);
struct ucounts * __must_check get_ucounts(struct ucounts *ucounts);
void put_ucounts(struct ucounts *ucounts);
-static inline long get_ucounts_value(struct ucounts *ucounts, enum ucount_type type)
+static inline long get_rlimit_value(struct ucounts *ucounts, enum rlimit_type type)
{
- return atomic_long_read(&ucounts->ucount[type]);
+ return atomic_long_read(&ucounts->rlimit[type]);
}
-long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v);
-bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v);
-long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum ucount_type type);
-void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum ucount_type type);
-bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max);
+long inc_rlimit_ucounts(struct ucounts *ucounts, enum rlimit_type type, long v);
+bool dec_rlimit_ucounts(struct ucounts *ucounts, enum rlimit_type type, long v);
+long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type);
+void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum rlimit_type type);
+bool is_rlimit_overlimit(struct ucounts *ucounts, enum rlimit_type type, unsigned long max);
+
+static inline long get_userns_rlimit_max(struct user_namespace *ns, enum rlimit_type type)
+{
+ return READ_ONCE(ns->rlimit_max[type]);
+}
-static inline void set_rlimit_ucount_max(struct user_namespace *ns,
- enum ucount_type type, unsigned long max)
+static inline void set_userns_rlimit_max(struct user_namespace *ns,
+ enum rlimit_type type, unsigned long max)
{
- ns->ucount_max[type] = max <= LONG_MAX ? max : LONG_MAX;
+ ns->rlimit_max[type] = max <= LONG_MAX ? max : LONG_MAX;
}
#ifdef CONFIG_USER_NS
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index 732b522bacb7..9df0b9a762cc 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -73,6 +73,8 @@ extern ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long dst_start,
extern int mwriteprotect_range(struct mm_struct *dst_mm,
unsigned long start, unsigned long len,
bool enable_wp, atomic_t *mmap_changing);
+extern void uffd_wp_range(struct mm_struct *dst_mm, struct vm_area_struct *vma,
+ unsigned long start, unsigned long len, bool enable_wp);
/* mm helpers */
static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma,
@@ -144,9 +146,9 @@ static inline bool userfaultfd_armed(struct vm_area_struct *vma)
static inline bool vma_can_userfault(struct vm_area_struct *vma,
unsigned long vm_flags)
{
- if (vm_flags & VM_UFFD_MINOR)
- return is_vm_hugetlb_page(vma) || vma_is_shmem(vma);
-
+ if ((vm_flags & VM_UFFD_MINOR) &&
+ (!is_vm_hugetlb_page(vma) && !vma_is_shmem(vma)))
+ return false;
#ifndef CONFIG_PTE_MARKER_UFFD_WP
/*
* If user requested uffd-wp but not enabled pte markers for
@@ -173,9 +175,8 @@ extern bool userfaultfd_remove(struct vm_area_struct *vma,
unsigned long start,
unsigned long end);
-extern int userfaultfd_unmap_prep(struct vm_area_struct *vma,
- unsigned long start, unsigned long end,
- struct list_head *uf);
+extern int userfaultfd_unmap_prep(struct mm_struct *mm, unsigned long start,
+ unsigned long end, struct list_head *uf);
extern void userfaultfd_unmap_complete(struct mm_struct *mm,
struct list_head *uf);
@@ -256,7 +257,7 @@ static inline bool userfaultfd_remove(struct vm_area_struct *vma,
return true;
}
-static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma,
+static inline int userfaultfd_unmap_prep(struct mm_struct *mm,
unsigned long start, unsigned long end,
struct list_head *uf)
{
diff --git a/include/linux/utsname.h b/include/linux/utsname.h
index 2b1737c9b244..bf7613ba412b 100644
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h
@@ -10,6 +10,7 @@
#include <uapi/linux/utsname.h>
enum uts_proc {
+ UTS_PROC_ARCH,
UTS_PROC_OSTYPE,
UTS_PROC_OSRELEASE,
UTS_PROC_VERSION,
diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index d282f464d2f1..6d0f5e4e82c2 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -104,6 +104,7 @@ struct vdpa_iova_range {
};
struct vdpa_dev_set_config {
+ u64 device_features;
struct {
u8 mac[ETH_ALEN];
u16 mtu;
diff --git a/include/linux/verification.h b/include/linux/verification.h
index a655923335ae..f34e50ebcf60 100644
--- a/include/linux/verification.h
+++ b/include/linux/verification.h
@@ -17,6 +17,14 @@
#define VERIFY_USE_SECONDARY_KEYRING ((struct key *)1UL)
#define VERIFY_USE_PLATFORM_KEYRING ((struct key *)2UL)
+static inline int system_keyring_id_check(u64 id)
+{
+ if (id > (unsigned long)VERIFY_USE_PLATFORM_KEYRING)
+ return -EINVAL;
+
+ return 0;
+}
+
/*
* The use to which an asymmetric key is being put.
*/
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index e05ddc6fe6a5..a615542df1e0 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -14,8 +14,12 @@
#include <linux/workqueue.h>
#include <linux/poll.h>
#include <uapi/linux/vfio.h>
+#include <linux/iova_bitmap.h>
struct kvm;
+struct iommufd_ctx;
+struct iommufd_device;
+struct iommufd_access;
/*
* VFIO devices can be placed in a set, this allows all devices to share this
@@ -33,10 +37,11 @@ struct vfio_device {
struct device *dev;
const struct vfio_device_ops *ops;
/*
- * mig_ops is a static property of the vfio_device which must be set
- * prior to registering the vfio_device.
+ * mig_ops/log_ops is a static property of the vfio_device which must
+ * be set prior to registering the vfio_device.
*/
const struct vfio_migration_ops *mig_ops;
+ const struct vfio_log_ops *log_ops;
struct vfio_group *group;
struct vfio_device_set *dev_set;
struct list_head dev_set_list;
@@ -45,16 +50,26 @@ struct vfio_device {
struct kvm *kvm;
/* Members below here are private, not for driver use */
- refcount_t refcount;
+ unsigned int index;
+ struct device device; /* device.kref covers object life circle */
+ refcount_t refcount; /* user count on registered device*/
unsigned int open_count;
struct completion comp;
struct list_head group_next;
struct list_head iommu_entry;
+ struct iommufd_access *iommufd_access;
+#if IS_ENABLED(CONFIG_IOMMUFD)
+ struct iommufd_device *iommufd_device;
+ struct iommufd_ctx *iommufd_ictx;
+ bool iommufd_attached;
+#endif
};
/**
* struct vfio_device_ops - VFIO bus driver device callbacks
*
+ * @init: initialize private fields in device structure
+ * @release: Reclaim private fields in device structure
* @open_device: Called when the first file descriptor is opened for this device
* @close_device: Opposite of open_device
* @read: Perform read(2) on device file descriptor
@@ -72,6 +87,12 @@ struct vfio_device {
*/
struct vfio_device_ops {
char *name;
+ int (*init)(struct vfio_device *vdev);
+ void (*release)(struct vfio_device *vdev);
+ int (*bind_iommufd)(struct vfio_device *vdev,
+ struct iommufd_ctx *ictx, u32 *out_device_id);
+ void (*unbind_iommufd)(struct vfio_device *vdev);
+ int (*attach_ioas)(struct vfio_device *vdev, u32 *pt_id);
int (*open_device)(struct vfio_device *vdev);
void (*close_device)(struct vfio_device *vdev);
ssize_t (*read)(struct vfio_device *vdev, char __user *buf,
@@ -88,6 +109,32 @@ struct vfio_device_ops {
void __user *arg, size_t argsz);
};
+#if IS_ENABLED(CONFIG_IOMMUFD)
+int vfio_iommufd_physical_bind(struct vfio_device *vdev,
+ struct iommufd_ctx *ictx, u32 *out_device_id);
+void vfio_iommufd_physical_unbind(struct vfio_device *vdev);
+int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id);
+int vfio_iommufd_emulated_bind(struct vfio_device *vdev,
+ struct iommufd_ctx *ictx, u32 *out_device_id);
+void vfio_iommufd_emulated_unbind(struct vfio_device *vdev);
+int vfio_iommufd_emulated_attach_ioas(struct vfio_device *vdev, u32 *pt_id);
+#else
+#define vfio_iommufd_physical_bind \
+ ((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx, \
+ u32 *out_device_id)) NULL)
+#define vfio_iommufd_physical_unbind \
+ ((void (*)(struct vfio_device *vdev)) NULL)
+#define vfio_iommufd_physical_attach_ioas \
+ ((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL)
+#define vfio_iommufd_emulated_bind \
+ ((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx, \
+ u32 *out_device_id)) NULL)
+#define vfio_iommufd_emulated_unbind \
+ ((void (*)(struct vfio_device *vdev)) NULL)
+#define vfio_iommufd_emulated_attach_ioas \
+ ((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL)
+#endif
+
/**
* @migration_set_state: Optional callback to change the migration state for
* devices that support migration. It's mandatory for
@@ -109,6 +156,28 @@ struct vfio_migration_ops {
};
/**
+ * @log_start: Optional callback to ask the device start DMA logging.
+ * @log_stop: Optional callback to ask the device stop DMA logging.
+ * @log_read_and_clear: Optional callback to ask the device read
+ * and clear the dirty DMAs in some given range.
+ *
+ * The vfio core implementation of the DEVICE_FEATURE_DMA_LOGGING_ set
+ * of features does not track logging state relative to the device,
+ * therefore the device implementation of vfio_log_ops must handle
+ * arbitrary user requests. This includes rejecting subsequent calls
+ * to log_start without an intervening log_stop, as well as graceful
+ * handling of log_stop and log_read_and_clear from invalid states.
+ */
+struct vfio_log_ops {
+ int (*log_start)(struct vfio_device *device,
+ struct rb_root_cached *ranges, u32 nnodes, u64 *page_size);
+ int (*log_stop)(struct vfio_device *device);
+ int (*log_read_and_clear)(struct vfio_device *device,
+ unsigned long iova, unsigned long length,
+ struct iova_bitmap *dirty);
+};
+
+/**
* vfio_check_feature - Validate user input for the VFIO_DEVICE_FEATURE ioctl
* @flags: Arg from the device_feature op
* @argsz: Arg from the device_feature op
@@ -137,14 +206,29 @@ static inline int vfio_check_feature(u32 flags, size_t argsz, u32 supported_ops,
return 1;
}
-void vfio_init_group_dev(struct vfio_device *device, struct device *dev,
- const struct vfio_device_ops *ops);
-void vfio_uninit_group_dev(struct vfio_device *device);
+struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev,
+ const struct vfio_device_ops *ops);
+#define vfio_alloc_device(dev_struct, member, dev, ops) \
+ container_of(_vfio_alloc_device(sizeof(struct dev_struct) + \
+ BUILD_BUG_ON_ZERO(offsetof( \
+ struct dev_struct, member)), \
+ dev, ops), \
+ struct dev_struct, member)
+
+int vfio_init_device(struct vfio_device *device, struct device *dev,
+ const struct vfio_device_ops *ops);
+void vfio_free_device(struct vfio_device *device);
+static inline void vfio_put_device(struct vfio_device *device)
+{
+ put_device(&device->device);
+}
+
int vfio_register_group_dev(struct vfio_device *device);
int vfio_register_emulated_iommu_dev(struct vfio_device *device);
void vfio_unregister_group_dev(struct vfio_device *device);
int vfio_assign_device_set(struct vfio_device *device, void *set_id);
+unsigned int vfio_device_set_open_count(struct vfio_device_set *dev_set);
int vfio_mig_get_next_state(struct vfio_device *device,
enum vfio_device_mig_state cur_fsm,
@@ -155,6 +239,7 @@ int vfio_mig_get_next_state(struct vfio_device *device,
* External user API
*/
struct iommu_group *vfio_file_iommu_group(struct file *file);
+bool vfio_file_is_group(struct file *file);
bool vfio_file_enforced_coherent(struct file *file);
void vfio_file_set_kvm(struct file *file, struct kvm *kvm);
bool vfio_file_has_dev(struct file *file, struct vfio_device *device);
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index 5579ece4347b..367fd79226a3 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -20,39 +20,10 @@
#define VFIO_PCI_CORE_H
#define VFIO_PCI_OFFSET_SHIFT 40
-
#define VFIO_PCI_OFFSET_TO_INDEX(off) (off >> VFIO_PCI_OFFSET_SHIFT)
#define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
#define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
-/* Special capability IDs predefined access */
-#define PCI_CAP_ID_INVALID 0xFF /* default raw access */
-#define PCI_CAP_ID_INVALID_VIRT 0xFE /* default virt access */
-
-/* Cap maximum number of ioeventfds per device (arbitrary) */
-#define VFIO_PCI_IOEVENTFD_MAX 1000
-
-struct vfio_pci_ioeventfd {
- struct list_head next;
- struct vfio_pci_core_device *vdev;
- struct virqfd *virqfd;
- void __iomem *addr;
- uint64_t data;
- loff_t pos;
- int bar;
- int count;
- bool test_mem;
-};
-
-struct vfio_pci_irq_ctx {
- struct eventfd_ctx *trigger;
- struct virqfd *unmask;
- struct virqfd *mask;
- char *name;
- bool masked;
- struct irq_bypass_producer producer;
-};
-
struct vfio_pci_core_device;
struct vfio_pci_region;
@@ -78,23 +49,6 @@ struct vfio_pci_region {
u32 flags;
};
-struct vfio_pci_dummy_resource {
- struct resource resource;
- int index;
- struct list_head res_next;
-};
-
-struct vfio_pci_vf_token {
- struct mutex lock;
- uuid_t uuid;
- int users;
-};
-
-struct vfio_pci_mmap_vma {
- struct vm_area_struct *vma;
- struct list_head vma_next;
-};
-
struct vfio_pci_core_device {
struct vfio_device vdev;
struct pci_dev *pdev;
@@ -124,11 +78,14 @@ struct vfio_pci_core_device {
bool needs_reset;
bool nointx;
bool needs_pm_restore;
+ bool pm_intx_masked;
+ bool pm_runtime_engaged;
struct pci_saved_state *pci_saved_state;
struct pci_saved_state *pm_save;
int ioeventfds_nr;
struct eventfd_ctx *err_trigger;
struct eventfd_ctx *req_trigger;
+ struct eventfd_ctx *pm_wake_eventfd_ctx;
struct list_head dummy_resources_list;
struct mutex ioeventfds_lock;
struct list_head ioeventfds_list;
@@ -141,100 +98,17 @@ struct vfio_pci_core_device {
struct rw_semaphore memory_lock;
};
-#define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX)
-#define is_msi(vdev) (vdev->irq_type == VFIO_PCI_MSI_IRQ_INDEX)
-#define is_msix(vdev) (vdev->irq_type == VFIO_PCI_MSIX_IRQ_INDEX)
-#define is_irq_none(vdev) (!(is_intx(vdev) || is_msi(vdev) || is_msix(vdev)))
-#define irq_is(vdev, type) (vdev->irq_type == type)
-
-void vfio_pci_intx_mask(struct vfio_pci_core_device *vdev);
-void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev);
-
-int vfio_pci_set_irqs_ioctl(struct vfio_pci_core_device *vdev,
- uint32_t flags, unsigned index,
- unsigned start, unsigned count, void *data);
-
-ssize_t vfio_pci_config_rw(struct vfio_pci_core_device *vdev,
- char __user *buf, size_t count,
- loff_t *ppos, bool iswrite);
-
-ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
- size_t count, loff_t *ppos, bool iswrite);
-
-#ifdef CONFIG_VFIO_PCI_VGA
-ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf,
- size_t count, loff_t *ppos, bool iswrite);
-#else
-static inline ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev,
- char __user *buf, size_t count,
- loff_t *ppos, bool iswrite)
-{
- return -EINVAL;
-}
-#endif
-
-long vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset,
- uint64_t data, int count, int fd);
-
-int vfio_pci_init_perm_bits(void);
-void vfio_pci_uninit_perm_bits(void);
-
-int vfio_config_init(struct vfio_pci_core_device *vdev);
-void vfio_config_free(struct vfio_pci_core_device *vdev);
-
-int vfio_pci_register_dev_region(struct vfio_pci_core_device *vdev,
- unsigned int type, unsigned int subtype,
- const struct vfio_pci_regops *ops,
- size_t size, u32 flags, void *data);
-
-int vfio_pci_set_power_state(struct vfio_pci_core_device *vdev,
- pci_power_t state);
-
-bool __vfio_pci_memory_enabled(struct vfio_pci_core_device *vdev);
-void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_core_device *vdev);
-u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_core_device *vdev);
-void vfio_pci_memory_unlock_and_restore(struct vfio_pci_core_device *vdev,
- u16 cmd);
-
-#ifdef CONFIG_VFIO_PCI_IGD
-int vfio_pci_igd_init(struct vfio_pci_core_device *vdev);
-#else
-static inline int vfio_pci_igd_init(struct vfio_pci_core_device *vdev)
-{
- return -ENODEV;
-}
-#endif
-
-#ifdef CONFIG_VFIO_PCI_ZDEV_KVM
-int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev,
- struct vfio_info_cap *caps);
-int vfio_pci_zdev_open_device(struct vfio_pci_core_device *vdev);
-void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev);
-#else
-static inline int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev,
- struct vfio_info_cap *caps)
-{
- return -ENODEV;
-}
-
-static inline int vfio_pci_zdev_open_device(struct vfio_pci_core_device *vdev)
-{
- return 0;
-}
-
-static inline void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev)
-{}
-#endif
-
/* Will be exported for vfio pci drivers usage */
+int vfio_pci_core_register_dev_region(struct vfio_pci_core_device *vdev,
+ unsigned int type, unsigned int subtype,
+ const struct vfio_pci_regops *ops,
+ size_t size, u32 flags, void *data);
void vfio_pci_core_set_params(bool nointxmask, bool is_disable_vga,
bool is_disable_idle_d3);
void vfio_pci_core_close_device(struct vfio_device *core_vdev);
-void vfio_pci_core_init_device(struct vfio_pci_core_device *vdev,
- struct pci_dev *pdev,
- const struct vfio_device_ops *vfio_pci_ops);
+int vfio_pci_core_init_dev(struct vfio_device *core_vdev);
+void vfio_pci_core_release_dev(struct vfio_device *core_vdev);
int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev);
-void vfio_pci_core_uninit_device(struct vfio_pci_core_device *vdev);
void vfio_pci_core_unregister_device(struct vfio_pci_core_device *vdev);
extern const struct pci_error_handlers vfio_pci_core_err_handlers;
int vfio_pci_core_sriov_configure(struct vfio_pci_core_device *vdev,
@@ -256,9 +130,4 @@ void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev);
pci_ers_result_t vfio_pci_core_aer_err_detected(struct pci_dev *pdev,
pci_channel_state_t state);
-static inline bool vfio_pci_is_vga(struct pci_dev *pdev)
-{
- return (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA;
-}
-
#endif /* VFIO_PCI_CORE_H */
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index a3f73bb6733e..dcab9c7e8784 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -11,7 +11,7 @@
#include <linux/gfp.h>
/**
- * virtqueue - a queue to register buffers for sending or receiving.
+ * struct virtqueue - a queue to register buffers for sending or receiving.
* @list: the chain of virtqueues for this device
* @callback: the function to call when buffers are consumed (can be NULL).
* @name: the name of this virtqueue (mainly for debugging)
@@ -97,7 +97,7 @@ int virtqueue_resize(struct virtqueue *vq, u32 num,
void (*recycle)(struct virtqueue *vq, void *buf));
/**
- * virtio_device - representation of a device using virtio
+ * struct virtio_device - representation of a device using virtio
* @index: unique position on the virtio bus
* @failed: saved value for VIRTIO_CONFIG_S_FAILED bit (for restore)
* @config_enabled: configuration change reporting enabled
@@ -156,7 +156,7 @@ size_t virtio_max_dma_size(struct virtio_device *vdev);
list_for_each_entry(vq, &vdev->vqs, list)
/**
- * virtio_driver - operations for a virtio I/O driver
+ * struct virtio_driver - operations for a virtio I/O driver
* @driver: underlying device driver (populate name and owner).
* @id_table: the ids serviced by this driver.
* @feature_table: an array of feature numbers supported by this driver.
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 6adff09f7170..4b517649cfe8 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -55,7 +55,6 @@ struct virtio_shm_region {
* include a NULL entry for vqs that do not need a callback
* names: array of virtqueue names (mainly for debugging)
* include a NULL entry for vqs unused by driver
- * sizes: array of virtqueue sizes
* Returns 0 on success or error status
* @del_vqs: free virtqueues found by find_vqs().
* @synchronize_cbs: synchronize with the virtqueue callbacks (optional)
@@ -104,9 +103,7 @@ struct virtio_config_ops {
void (*reset)(struct virtio_device *vdev);
int (*find_vqs)(struct virtio_device *, unsigned nvqs,
struct virtqueue *vqs[], vq_callback_t *callbacks[],
- const char * const names[],
- u32 sizes[],
- const bool *ctx,
+ const char * const names[], const bool *ctx,
struct irq_affinity *desc);
void (*del_vqs)(struct virtio_device *);
void (*synchronize_cbs)(struct virtio_device *);
@@ -215,7 +212,7 @@ struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev,
const char *names[] = { n };
struct virtqueue *vq;
int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names, NULL,
- NULL, NULL);
+ NULL);
if (err < 0)
return ERR_PTR(err);
return vq;
@@ -227,8 +224,7 @@ int virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs,
const char * const names[],
struct irq_affinity *desc)
{
- return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL,
- NULL, desc);
+ return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL, desc);
}
static inline
@@ -237,25 +233,13 @@ int virtio_find_vqs_ctx(struct virtio_device *vdev, unsigned nvqs,
const char * const names[], const bool *ctx,
struct irq_affinity *desc)
{
- return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL,
- ctx, desc);
-}
-
-static inline
-int virtio_find_vqs_ctx_size(struct virtio_device *vdev, u32 nvqs,
- struct virtqueue *vqs[],
- vq_callback_t *callbacks[],
- const char * const names[],
- u32 sizes[],
- const bool *ctx, struct irq_affinity *desc)
-{
- return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, sizes,
- ctx, desc);
+ return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, ctx,
+ desc);
}
/**
* virtio_synchronize_cbs - synchronize with virtqueue callbacks
- * @vdev: the device
+ * @dev: the virtio device
*/
static inline
void virtio_synchronize_cbs(struct virtio_device *dev)
@@ -274,7 +258,7 @@ void virtio_synchronize_cbs(struct virtio_device *dev)
/**
* virtio_device_ready - enable vq use in probe function
- * @vdev: the device
+ * @dev: the virtio device
*
* Driver must call this to use vqs in the probe function.
*
@@ -322,7 +306,7 @@ const char *virtio_bus_name(struct virtio_device *vdev)
/**
* virtqueue_set_affinity - setting affinity for a virtqueue
* @vq: the virtqueue
- * @cpu: the cpu no.
+ * @cpu_mask: the cpu mask
*
* Pay attention the function are best-effort: the affinity hint may not be set
* due to config support, irq type and sharing.
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index a960de68ac69..bdf8de2cdd93 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -15,6 +15,7 @@ static inline bool virtio_net_hdr_match_proto(__be16 protocol, __u8 gso_type)
case VIRTIO_NET_HDR_GSO_TCPV6:
return protocol == cpu_to_be16(ETH_P_IPV6);
case VIRTIO_NET_HDR_GSO_UDP:
+ case VIRTIO_NET_HDR_GSO_UDP_L4:
return protocol == cpu_to_be16(ETH_P_IP) ||
protocol == cpu_to_be16(ETH_P_IPV6);
default:
@@ -31,6 +32,7 @@ static inline int virtio_net_hdr_set_proto(struct sk_buff *skb,
switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
case VIRTIO_NET_HDR_GSO_TCPV4:
case VIRTIO_NET_HDR_GSO_UDP:
+ case VIRTIO_NET_HDR_GSO_UDP_L4:
skb->protocol = cpu_to_be16(ETH_P_IP);
break;
case VIRTIO_NET_HDR_GSO_TCPV6:
@@ -69,6 +71,11 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
ip_proto = IPPROTO_UDP;
thlen = sizeof(struct udphdr);
break;
+ case VIRTIO_NET_HDR_GSO_UDP_L4:
+ gso_type = SKB_GSO_UDP_L4;
+ ip_proto = IPPROTO_UDP;
+ thlen = sizeof(struct udphdr);
+ break;
default:
return -EINVAL;
}
@@ -182,6 +189,8 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb,
hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
else if (sinfo->gso_type & SKB_GSO_TCPV6)
hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
+ else if (sinfo->gso_type & SKB_GSO_UDP_L4)
+ hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP_L4;
else
return -EINVAL;
if (sinfo->gso_type & SKB_GSO_TCP_ECN)
diff --git a/include/linux/virtio_pci_legacy.h b/include/linux/virtio_pci_legacy.h
index e5d665faf00e..a8dc757d0367 100644
--- a/include/linux/virtio_pci_legacy.h
+++ b/include/linux/virtio_pci_legacy.h
@@ -32,8 +32,6 @@ void vp_legacy_set_queue_address(struct virtio_pci_legacy_device *ldev,
u16 index, u32 queue_pfn);
bool vp_legacy_get_queue_enable(struct virtio_pci_legacy_device *ldev,
u16 idx);
-void vp_legacy_set_queue_size(struct virtio_pci_legacy_device *ldev,
- u16 idx, u16 size);
u16 vp_legacy_get_queue_size(struct virtio_pci_legacy_device *ldev,
u16 idx);
int vp_legacy_probe(struct virtio_pci_legacy_device *ldev);
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 404024486fa5..7f5d1caf5890 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -20,12 +20,19 @@
#define HIGHMEM_ZONE(xx)
#endif
-#define FOR_ALL_ZONES(xx) DMA_ZONE(xx) DMA32_ZONE(xx) xx##_NORMAL, HIGHMEM_ZONE(xx) xx##_MOVABLE
+#ifdef CONFIG_ZONE_DEVICE
+#define DEVICE_ZONE(xx) xx##_DEVICE,
+#else
+#define DEVICE_ZONE(xx)
+#endif
+
+#define FOR_ALL_ZONES(xx) DMA_ZONE(xx) DMA32_ZONE(xx) xx##_NORMAL, \
+ HIGHMEM_ZONE(xx) xx##_MOVABLE, DEVICE_ZONE(xx)
enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
- FOR_ALL_ZONES(PGALLOC),
- FOR_ALL_ZONES(ALLOCSTALL),
- FOR_ALL_ZONES(PGSCAN_SKIP),
+ FOR_ALL_ZONES(PGALLOC)
+ FOR_ALL_ZONES(ALLOCSTALL)
+ FOR_ALL_ZONES(PGSCAN_SKIP)
PGFREE, PGACTIVATE, PGDEACTIVATE, PGLAZYFREE,
PGFAULT, PGMAJFAULT,
PGLAZYFREED,
@@ -33,10 +40,13 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
PGREUSE,
PGSTEAL_KSWAPD,
PGSTEAL_DIRECT,
+ PGSTEAL_KHUGEPAGED,
PGDEMOTE_KSWAPD,
PGDEMOTE_DIRECT,
+ PGDEMOTE_KHUGEPAGED,
PGSCAN_KSWAPD,
PGSCAN_DIRECT,
+ PGSCAN_KHUGEPAGED,
PGSCAN_DIRECT_THROTTLE,
PGSCAN_ANON,
PGSCAN_FILE,
@@ -122,10 +132,6 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
NR_TLB_LOCAL_FLUSH_ALL,
NR_TLB_LOCAL_FLUSH_ONE,
#endif /* CONFIG_DEBUG_TLBFLUSH */
-#ifdef CONFIG_DEBUG_VM_VMACACHE
- VMACACHE_FIND_CALLS,
- VMACACHE_FIND_HITS,
-#endif
#ifdef CONFIG_SWAP
SWAP_RA,
SWAP_RA_HIT,
diff --git a/include/linux/vmacache.h b/include/linux/vmacache.h
deleted file mode 100644
index 6fce268a4588..000000000000
--- a/include/linux/vmacache.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __LINUX_VMACACHE_H
-#define __LINUX_VMACACHE_H
-
-#include <linux/sched.h>
-#include <linux/mm.h>
-
-static inline void vmacache_flush(struct task_struct *tsk)
-{
- memset(tsk->vmacache.vmas, 0, sizeof(tsk->vmacache.vmas));
-}
-
-extern void vmacache_update(unsigned long addr, struct vm_area_struct *newvma);
-extern struct vm_area_struct *vmacache_find(struct mm_struct *mm,
- unsigned long addr);
-
-#ifndef CONFIG_MMU
-extern struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm,
- unsigned long start,
- unsigned long end);
-#endif
-
-static inline void vmacache_invalidate(struct mm_struct *mm)
-{
- mm->vmacache_seqnum++;
-}
-
-#endif /* __LINUX_VMACACHE_H */
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index bfe38869498d..19cf5b6892ce 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -125,12 +125,6 @@ static inline void vm_events_fold_cpu(int cpu)
#define count_vm_tlb_events(x, y) do { (void)(y); } while (0)
#endif
-#ifdef CONFIG_DEBUG_VM_VMACACHE
-#define count_vm_vmacache_event(x) count_vm_event(x)
-#else
-#define count_vm_vmacache_event(x) do {} while (0)
-#endif
-
#define __count_zid_vm_events(item, zid, delta) \
__count_vm_events(item##_NORMAL - ZONE_NORMAL + zid, delta)
diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h
index b5ab452fca5b..c1f5aebef170 100644
--- a/include/linux/vt_kern.h
+++ b/include/linux/vt_kern.h
@@ -30,7 +30,6 @@ struct vc_data *vc_deallocate(unsigned int console);
void reset_palette(struct vc_data *vc);
void do_blank_screen(int entering_gfx);
void do_unblank_screen(int leaving_gfx);
-void unblank_screen(void);
void poke_blanked_console(void);
int con_font_op(struct vc_data *vc, struct console_font_op *op);
int con_set_cmap(unsigned char __user *cmap);
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 58cfbf81447c..a0307b516b09 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -209,7 +209,7 @@ __remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq
list_del(&wq_entry->entry);
}
-void __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key);
+int __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key);
void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key);
void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head,
unsigned int mode, void *key, wait_queue_entry_t *bookmark);
@@ -281,7 +281,7 @@ static inline void wake_up_pollfree(struct wait_queue_head *wq_head)
#define ___wait_is_interruptible(state) \
(!__builtin_constant_p(state) || \
- state == TASK_INTERRUPTIBLE || state == TASK_KILLABLE) \
+ (state & (TASK_INTERRUPTIBLE | TASK_WAKEKILL)))
extern void init_wait_entry(struct wait_queue_entry *wq_entry, int flags);
@@ -361,8 +361,8 @@ do { \
} while (0)
#define __wait_event_freezable(wq_head, condition) \
- ___wait_event(wq_head, condition, TASK_INTERRUPTIBLE, 0, 0, \
- freezable_schedule())
+ ___wait_event(wq_head, condition, (TASK_INTERRUPTIBLE|TASK_FREEZABLE), \
+ 0, 0, schedule())
/**
* wait_event_freezable - sleep (or freeze) until a condition gets true
@@ -420,8 +420,8 @@ do { \
#define __wait_event_freezable_timeout(wq_head, condition, timeout) \
___wait_event(wq_head, ___wait_cond_timeout(condition), \
- TASK_INTERRUPTIBLE, 0, timeout, \
- __ret = freezable_schedule_timeout(__ret))
+ (TASK_INTERRUPTIBLE|TASK_FREEZABLE), 0, timeout, \
+ __ret = schedule_timeout(__ret))
/*
* like wait_event_timeout() -- except it uses TASK_INTERRUPTIBLE to avoid
@@ -642,8 +642,8 @@ do { \
#define __wait_event_freezable_exclusive(wq, condition) \
- ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0, \
- freezable_schedule())
+ ___wait_event(wq, condition, (TASK_INTERRUPTIBLE|TASK_FREEZABLE), 1, 0,\
+ schedule())
#define wait_event_freezable_exclusive(wq, condition) \
({ \
@@ -932,6 +932,34 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *);
__ret; \
})
+#define __wait_event_state(wq, condition, state) \
+ ___wait_event(wq, condition, state, 0, 0, schedule())
+
+/**
+ * wait_event_state - sleep until a condition gets true
+ * @wq_head: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ * @state: state to sleep in
+ *
+ * The process is put to sleep (@state) until the @condition evaluates to true
+ * or a signal is received (when allowed by @state). The @condition is checked
+ * each time the waitqueue @wq_head is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ * The function will return -ERESTARTSYS if it was interrupted by a signal
+ * (when allowed by @state) and 0 if @condition evaluated to true.
+ */
+#define wait_event_state(wq_head, condition, state) \
+({ \
+ int __ret = 0; \
+ might_sleep(); \
+ if (!(condition)) \
+ __ret = __wait_event_state(wq_head, condition, state); \
+ __ret; \
+})
+
#define __wait_event_killable_timeout(wq_head, condition, timeout) \
___wait_event(wq_head, ___wait_cond_timeout(condition), \
TASK_KILLABLE, 0, timeout, \
diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h
index 7dec36aecbd9..7725b7579b78 100644
--- a/include/linux/wait_bit.h
+++ b/include/linux/wait_bit.h
@@ -71,7 +71,7 @@ static inline int
wait_on_bit(unsigned long *word, int bit, unsigned mode)
{
might_sleep();
- if (!test_bit(bit, word))
+ if (!test_bit_acquire(bit, word))
return 0;
return out_of_line_wait_on_bit(word, bit,
bit_wait,
@@ -96,7 +96,7 @@ static inline int
wait_on_bit_io(unsigned long *word, int bit, unsigned mode)
{
might_sleep();
- if (!test_bit(bit, word))
+ if (!test_bit_acquire(bit, word))
return 0;
return out_of_line_wait_on_bit(word, bit,
bit_wait_io,
@@ -123,7 +123,7 @@ wait_on_bit_timeout(unsigned long *word, int bit, unsigned mode,
unsigned long timeout)
{
might_sleep();
- if (!test_bit(bit, word))
+ if (!test_bit_acquire(bit, word))
return 0;
return out_of_line_wait_on_bit_timeout(word, bit,
bit_wait_timeout,
@@ -151,7 +151,7 @@ wait_on_bit_action(unsigned long *word, int bit, wait_bit_action_f *action,
unsigned mode)
{
might_sleep();
- if (!test_bit(bit, word))
+ if (!test_bit_acquire(bit, word))
return 0;
return out_of_line_wait_on_bit(word, bit, action, mode);
}
diff --git a/include/linux/wireless.h b/include/linux/wireless.h
index 2d1b54556eff..e6e34d74dda0 100644
--- a/include/linux/wireless.h
+++ b/include/linux/wireless.h
@@ -26,7 +26,15 @@ struct compat_iw_point {
struct __compat_iw_event {
__u16 len; /* Real length of this stuff */
__u16 cmd; /* Wireless IOCTL */
- compat_caddr_t pointer;
+
+ union {
+ compat_caddr_t pointer;
+
+ /* we need ptr_bytes to make memcpy() run-time destination
+ * buffer bounds checking happy, nothing special
+ */
+ DECLARE_FLEX_ARRAY(__u8, ptr_bytes);
+ };
};
#define IW_EV_COMPAT_LCP_LEN offsetof(struct __compat_iw_event, pointer)
#define IW_EV_COMPAT_POINT_OFF offsetof(struct compat_iw_point, length)
diff --git a/include/linux/wl12xx.h b/include/linux/wl12xx.h
deleted file mode 100644
index 03d61f1d23ab..000000000000
--- a/include/linux/wl12xx.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * This file is part of wl12xx
- *
- * Copyright (C) 2009 Nokia Corporation
- *
- * Contact: Luciano Coelho <luciano.coelho@nokia.com>
- */
-
-#ifndef _LINUX_WL12XX_H
-#define _LINUX_WL12XX_H
-
-#include <linux/err.h>
-
-struct wl1251_platform_data {
- int power_gpio;
- /* SDIO only: IRQ number if WLAN_IRQ line is used, 0 for SDIO IRQs */
- int irq;
- bool use_eeprom;
-};
-
-#ifdef CONFIG_WILINK_PLATFORM_DATA
-
-int wl1251_set_platform_data(const struct wl1251_platform_data *data);
-
-struct wl1251_platform_data *wl1251_get_platform_data(void);
-
-#else
-
-static inline
-int wl1251_set_platform_data(const struct wl1251_platform_data *data)
-{
- return -ENOSYS;
-}
-
-static inline
-struct wl1251_platform_data *wl1251_get_platform_data(void)
-{
- return ERR_PTR(-ENODATA);
-}
-
-#endif
-
-#endif
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 3f045f6d6c4f..06f9291b6fd5 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -17,20 +17,12 @@ struct bio;
DECLARE_PER_CPU(int, dirty_throttle_leaks);
/*
- * The 1/4 region under the global dirty thresh is for smooth dirty throttling:
- *
- * (thresh - thresh/DIRTY_FULL_SCOPE, thresh)
- *
- * Further beyond, all dirtier tasks will enter a loop waiting (possibly long
- * time) for the dirty pages to drop, unless written enough pages.
- *
* The global dirty threshold is normally equal to the global dirty limit,
* except when the system suddenly allocates a lot of anonymous memory and
* knocks down the global dirty threshold quickly, in which case the global
* dirty limit will follow down slowly to prevent livelocking all dirtier tasks.
*/
#define DIRTY_SCOPE 8
-#define DIRTY_FULL_SCOPE (DIRTY_SCOPE / 2)
struct backing_dev_info;
diff --git a/include/linux/wwan.h b/include/linux/wwan.h
index 5ce2acf444fb..24d76500b1cc 100644
--- a/include/linux/wwan.h
+++ b/include/linux/wwan.h
@@ -15,6 +15,7 @@
* @WWAN_PORT_QMI: Qcom modem/MSM interface for modem control
* @WWAN_PORT_QCDM: Qcom Modem diagnostic interface
* @WWAN_PORT_FIREHOSE: XML based command protocol
+ * @WWAN_PORT_XMMRPC: Control protocol for Intel XMM modems
*
* @WWAN_PORT_MAX: Highest supported port types
* @WWAN_PORT_UNKNOWN: Special value to indicate an unknown port type
@@ -26,6 +27,7 @@ enum wwan_port_type {
WWAN_PORT_QMI,
WWAN_PORT_QCDM,
WWAN_PORT_FIREHOSE,
+ WWAN_PORT_XMMRPC,
/* Add new port types above this line */
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index 979a9d3e5bfb..2e7dd44926e4 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -22,6 +22,12 @@
struct inode;
struct dentry;
+static inline bool is_posix_acl_xattr(const char *name)
+{
+ return (strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
+ (strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0);
+}
+
/*
* struct xattr_handler: When @name is set, match attributes with exactly that
* name. When @prefix is set instead, match attributes with that prefix and
@@ -61,16 +67,16 @@ int __vfs_setxattr_locked(struct user_namespace *, struct dentry *,
const char *, const void *, size_t, int,
struct inode **);
int vfs_setxattr(struct user_namespace *, struct dentry *, const char *,
- void *, size_t, int);
+ const void *, size_t, int);
int __vfs_removexattr(struct user_namespace *, struct dentry *, const char *);
int __vfs_removexattr_locked(struct user_namespace *, struct dentry *,
const char *, struct inode **);
int vfs_removexattr(struct user_namespace *, struct dentry *, const char *);
ssize_t generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size);
-ssize_t vfs_getxattr_alloc(struct user_namespace *mnt_userns,
- struct dentry *dentry, const char *name,
- char **xattr_value, size_t size, gfp_t flags);
+int vfs_getxattr_alloc(struct user_namespace *mnt_userns,
+ struct dentry *dentry, const char *name,
+ char **xattr_value, size_t size, gfp_t flags);
int xattr_supported_namespace(struct inode *inode, const char *prefix);
@@ -80,48 +86,28 @@ static inline const char *xattr_prefix(const struct xattr_handler *handler)
}
struct simple_xattrs {
- struct list_head head;
- spinlock_t lock;
+ struct rb_root rb_root;
+ rwlock_t lock;
};
struct simple_xattr {
- struct list_head list;
+ struct rb_node rb_node;
char *name;
size_t size;
char value[];
};
-/*
- * initialize the simple_xattrs structure
- */
-static inline void simple_xattrs_init(struct simple_xattrs *xattrs)
-{
- INIT_LIST_HEAD(&xattrs->head);
- spin_lock_init(&xattrs->lock);
-}
-
-/*
- * free all the xattrs
- */
-static inline void simple_xattrs_free(struct simple_xattrs *xattrs)
-{
- struct simple_xattr *xattr, *node;
-
- list_for_each_entry_safe(xattr, node, &xattrs->head, list) {
- kfree(xattr->name);
- kvfree(xattr);
- }
-}
-
+void simple_xattrs_init(struct simple_xattrs *xattrs);
+void simple_xattrs_free(struct simple_xattrs *xattrs);
struct simple_xattr *simple_xattr_alloc(const void *value, size_t size);
int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
void *buffer, size_t size);
int simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
const void *value, size_t size, int flags,
ssize_t *removed_size);
-ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, char *buffer,
- size_t size);
-void simple_xattr_list_add(struct simple_xattrs *xattrs,
- struct simple_xattr *new_xattr);
+ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
+ char *buffer, size_t size);
+void simple_xattr_add(struct simple_xattrs *xattrs,
+ struct simple_xattr *new_xattr);
#endif /* _LINUX_XATTR_H */
diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h
index 2a430e713ce5..a48cd0ffe57d 100644
--- a/include/linux/zsmalloc.h
+++ b/include/linux/zsmalloc.h
@@ -55,5 +55,7 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle);
unsigned long zs_get_total_pages(struct zs_pool *pool);
unsigned long zs_compact(struct zs_pool *pool);
+unsigned int zs_lookup_class_index(struct zs_pool *pool, unsigned int size);
+
void zs_pool_stats(struct zs_pool *pool, struct zs_pool_stats *stats);
#endif