summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2026-02-26 21:20:47 +0300
committerJakub Kicinski <kuba@kernel.org>2026-03-05 23:11:05 +0300
commit0b1324cdd8de9f54f9daf689a4ae59783c333510 (patch)
tree991952409fe93bd0ea73f8683b497e31bfe6952f
parentab99e1167293726f016360a5dbbbc10386fd7d66 (diff)
parentabacaf559950eec0d99d37ff6b92049409af5943 (diff)
downloadlinux-0b1324cdd8de9f54f9daf689a4ae59783c333510.tar.xz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Cross-merge networking fixes after downstream PR (net-7.0-rc3). No conflicts. Adjacent changes: net/netfilter/nft_set_rbtree.c fb7fb4016300 ("netfilter: nf_tables: clone set on flush only") 3aea466a4399 ("netfilter: nft_set_rbtree: don't disable bh when acquiring tree lock") Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--.mailmap11
-rw-r--r--CREDITS8
-rw-r--r--Documentation/devicetree/bindings/net/can/nxp,sja1000.yaml1
-rw-r--r--Documentation/devicetree/bindings/regulator/mt6359-regulator.yaml4
-rw-r--r--Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml31
-rw-r--r--Documentation/netlink/specs/nfsd.yaml4
-rw-r--r--Documentation/virt/kvm/api.rst10
-rw-r--r--MAINTAINERS61
-rw-r--r--Makefile2
-rw-r--r--arch/arm64/include/asm/io.h26
-rw-r--r--arch/arm64/include/asm/kvm_host.h3
-rw-r--r--arch/arm64/include/asm/kvm_nested.h2
-rw-r--r--arch/arm64/include/asm/pgtable-prot.h3
-rw-r--r--arch/arm64/include/asm/tlbflush.h63
-rw-r--r--arch/arm64/kernel/acpi.c2
-rw-r--r--arch/arm64/kernel/sys_compat.c2
-rw-r--r--arch/arm64/kernel/topology.c21
-rw-r--r--arch/arm64/kvm/Kconfig1
-rw-r--r--arch/arm64/kvm/arm.c1
-rw-r--r--arch/arm64/kvm/at.c27
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mm.c2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/pkvm.c37
-rw-r--r--arch/arm64/kvm/hyp/nvhe/tlb.c8
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c2
-rw-r--r--arch/arm64/kvm/hyp/vhe/tlb.c10
-rw-r--r--arch/arm64/kvm/mmu.c12
-rw-r--r--arch/arm64/kvm/nested.c63
-rw-r--r--arch/arm64/kvm/sys_regs.c3
-rw-r--r--arch/arm64/lib/delay.c6
-rw-r--r--arch/arm64/mm/ioremap.c6
-rw-r--r--arch/arm64/mm/mmap.c12
-rw-r--r--arch/arm64/net/bpf_jit_comp.c2
-rw-r--r--arch/loongarch/kvm/Kconfig1
-rw-r--r--arch/loongarch/kvm/vm.c1
-rw-r--r--arch/mips/kvm/Kconfig1
-rw-r--r--arch/mips/kvm/mips.c1
-rw-r--r--arch/powerpc/kvm/Kconfig4
-rw-r--r--arch/powerpc/kvm/powerpc.c6
-rw-r--r--arch/riscv/kvm/Kconfig1
-rw-r--r--arch/riscv/kvm/vm.c1
-rw-r--r--arch/s390/include/asm/idle.h4
-rw-r--r--arch/s390/include/asm/vtime.h34
-rw-r--r--arch/s390/kernel/entry.h2
-rw-r--r--arch/s390/kernel/idle.c25
-rw-r--r--arch/s390/kernel/ipl.c2
-rw-r--r--arch/s390/kernel/irq.c20
-rw-r--r--arch/s390/kernel/vtime.c42
-rw-r--r--arch/s390/kvm/Kconfig2
-rw-r--r--arch/s390/kvm/kvm-s390.c1
-rw-r--r--arch/s390/mm/pfault.c4
-rw-r--r--arch/sparc/kernel/iommu.c2
-rw-r--r--arch/sparc/kernel/pci_sun4v.c2
-rw-r--r--arch/um/drivers/ubd_kern.c10
-rw-r--r--arch/x86/entry/entry_fred.c5
-rw-r--r--arch/x86/events/intel/uncore_snbep.c28
-rw-r--r--arch/x86/include/asm/bug.h6
-rw-r--r--arch/x86/include/asm/cfi.h12
-rw-r--r--arch/x86/include/asm/irqflags.h4
-rw-r--r--arch/x86/include/asm/linkage.h4
-rw-r--r--arch/x86/include/asm/percpu.h2
-rw-r--r--arch/x86/include/asm/runtime-const.h6
-rw-r--r--arch/x86/include/asm/traps.h2
-rw-r--r--arch/x86/kernel/alternative.c29
-rw-r--r--arch/x86/kernel/traps.c2
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/kvm/x86.c1
-rw-r--r--arch/x86/mm/extable.c7
-rw-r--r--arch/x86/net/bpf_jit_comp.c13
-rw-r--r--drivers/accel/amdxdna/aie2_ctx.c32
-rw-r--r--drivers/accel/amdxdna/aie2_message.c15
-rw-r--r--drivers/accel/amdxdna/aie2_pci.c36
-rw-r--r--drivers/accel/amdxdna/aie2_pm.c2
-rw-r--r--drivers/accel/amdxdna/amdxdna_ctx.c24
-rw-r--r--drivers/accel/amdxdna/amdxdna_gem.c38
-rw-r--r--drivers/accel/amdxdna/amdxdna_pci_drv.c3
-rw-r--r--drivers/accel/amdxdna/amdxdna_pm.c2
-rw-r--r--drivers/accel/amdxdna/amdxdna_pm.h11
-rw-r--r--drivers/accel/amdxdna/amdxdna_ubuf.c6
-rw-r--r--drivers/accel/amdxdna/npu1_regs.c2
-rw-r--r--drivers/accel/amdxdna/npu4_regs.c2
-rw-r--r--drivers/accel/amdxdna/npu5_regs.c2
-rw-r--r--drivers/accel/amdxdna/npu6_regs.c2
-rw-r--r--drivers/accel/ethosu/ethosu_gem.c2
-rw-r--r--drivers/acpi/osi.c13
-rw-r--r--drivers/acpi/sleep.c8
-rw-r--r--drivers/base/property.c27
-rw-r--r--drivers/block/drbd/drbd_actlog.c53
-rw-r--r--drivers/block/drbd/drbd_interval.h5
-rw-r--r--drivers/block/drbd/drbd_main.c14
-rw-r--r--drivers/block/drbd/drbd_receiver.c4
-rw-r--r--drivers/block/drbd/drbd_req.c3
-rw-r--r--drivers/block/zloop.c31
-rw-r--r--drivers/char/ipmi/ipmi_ipmb.c5
-rw-r--r--drivers/char/ipmi/ipmi_msghandler.c141
-rw-r--r--drivers/char/ipmi/ipmi_si_intf.c37
-rw-r--r--drivers/char/ipmi/ipmi_si_ls2k.c2
-rw-r--r--drivers/clk/imx/clk-imx8qxp.c24
-rw-r--r--drivers/clk/imx/clk-scu.c12
-rw-r--r--drivers/clk/imx/clk-scu.h2
-rw-r--r--drivers/cpufreq/intel_pstate.c14
-rw-r--r--drivers/cxl/core/core.h18
-rw-r--r--drivers/cxl/core/hdm.c2
-rw-r--r--drivers/cxl/core/mbox.c11
-rw-r--r--drivers/cxl/core/memdev.c13
-rw-r--r--drivers/cxl/core/pmem.c42
-rw-r--r--drivers/cxl/core/port.c52
-rw-r--r--drivers/cxl/core/region.c4
-rw-r--r--drivers/cxl/cxl.h7
-rw-r--r--drivers/cxl/pmem.c22
-rw-r--r--drivers/firewire/ohci.c2
-rw-r--r--drivers/gpio/gpiolib-shared.c6
-rw-r--r--drivers/gpio/gpiolib.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c22
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v11_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v12_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_stream.c4
-rw-r--r--drivers/gpu/drm/bridge/samsung-dsim.c23
-rw-r--r--drivers/gpu/drm/bridge/synopsys/dw-dp.c4
-rw-r--r--drivers/gpu/drm/bridge/ti-sn65dsi86.c6
-rw-r--r--drivers/gpu/drm/drm_client_modeset.c3
-rw-r--r--drivers/gpu/drm/drm_gpusvm.c10
-rw-r--r--drivers/gpu/drm/i915/display/intel_alpm.c7
-rw-r--r--drivers/gpu/drm/imx/ipuv3/parallel-display.c4
-rw-r--r--drivers/gpu/drm/logicvc/logicvc_drm.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c12
-rw-r--r--drivers/gpu/drm/tiny/sharp-memory.c4
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c4
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c4
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c9
-rw-r--r--drivers/gpu/drm/xe/regs/xe_engine_regs.h6
-rw-r--r--drivers/gpu/drm/xe/xe_gt.c66
-rw-r--r--drivers/gpu/drm/xe/xe_sync.c30
-rw-r--r--drivers/infiniband/Kconfig1
-rw-r--r--drivers/infiniband/core/cache.c13
-rw-r--r--drivers/infiniband/core/cma.c6
-rw-r--r--drivers/infiniband/core/core_priv.h3
-rw-r--r--drivers/infiniband/core/device.c34
-rw-r--r--drivers/infiniband/core/umem_dmabuf.c4
-rw-r--r--drivers/infiniband/core/uverbs_std_types_dmabuf.c2
-rw-r--r--drivers/infiniband/hw/bng_re/bng_dev.c56
-rw-r--r--drivers/infiniband/hw/efa/efa_verbs.c2
-rw-r--r--drivers/infiniband/hw/ionic/ionic_controlpath.c2
-rw-r--r--drivers/infiniband/hw/ionic/ionic_ibdev.c2
-rw-r--r--drivers/infiniband/hw/irdma/verbs.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c5
-rw-r--r--drivers/irqchip/irq-gic-v3-its.c4
-rw-r--r--drivers/irqchip/irq-gic-v5-irs.c2
-rw-r--r--drivers/irqchip/irq-ls-extirq.c53
-rw-r--r--drivers/irqchip/irq-mmp.c2
-rw-r--r--drivers/irqchip/irq-sifive-plic.c7
-rw-r--r--drivers/media/dvb-core/dvb_net.c3
-rw-r--r--drivers/mmc/host/dw_mmc-rockchip.c38
-rw-r--r--drivers/mmc/host/mmci_qcom_dml.c1
-rw-r--r--drivers/mmc/host/sdhci-brcmstb.c2
-rw-r--r--drivers/net/bonding/bond_main.c9
-rw-r--r--drivers/net/bonding/bond_options.c2
-rw-r--r--drivers/net/can/dummy_can.c1
-rw-r--r--drivers/net/can/spi/mcp251x.c15
-rw-r--r--drivers/net/can/usb/ems_usb.c7
-rw-r--r--drivers/net/can/usb/esd_usb.c30
-rw-r--r--drivers/net/can/usb/etas_es58x/es58x_core.c8
-rw-r--r--drivers/net/can/usb/f81604.c45
-rw-r--r--drivers/net/can/usb/gs_usb.c22
-rw-r--r--drivers/net/can/usb/ucan.c2
-rw-r--r--drivers/net/dsa/realtek/rtl8365mb.c2
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-common.h2
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-drv.c10
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-main.c1
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe.h3
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c3
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc.c2
-rw-r--r--drivers/net/ethernet/intel/e1000e/defines.h1
-rw-r--r--drivers/net/ethernet/intel/e1000e/e1000.h4
-rw-r--r--drivers/net/ethernet/intel/e1000e/hw.h2
-rw-r--r--drivers/net/ethernet/intel/e1000e/ich8lan.c31
-rw-r--r--drivers/net/ethernet/intel/e1000e/netdev.c15
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c41
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_trace.h2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c5
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_main.c17
-rw-r--r--drivers/net/ethernet/intel/ice/ice.h1
-rw-r--r--drivers/net/ethernet/intel/ice/ice_base.c38
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.c15
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool.c51
-rw-r--r--drivers/net/ethernet/intel/ice/ice_idc.c44
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.c15
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c7
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.c4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_xsk.c3
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_ethtool.c3
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_lib.c1
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_txrx.c14
-rw-r--r--drivers/net/ethernet/intel/idpf/xdp.c6
-rw-r--r--drivers/net/ethernet/intel/idpf/xsk.c1
-rw-r--r--drivers/net/ethernet/intel/igb/igb_xsk.c38
-rw-r--r--drivers/net/ethernet/intel/igc/igc_main.c34
-rw-r--r--drivers/net/ethernet/intel/igc/igc_ptp.c3
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/vf.c3
-rw-r--r--drivers/net/ethernet/intel/libeth/xsk.c1
-rw-r--r--drivers/net/ethernet/intel/libie/fwlog.c4
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep/octep_main.c40
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep/octep_rx.c27
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c38
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c28
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.c15
-rw-r--r--drivers/net/ethernet/microsoft/mana/mana_en.c23
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac.h1
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c53
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.c60
-rw-r--r--drivers/net/ethernet/ti/am65-cpsw-nuss.c2
-rw-r--r--drivers/net/ethernet/ti/cpsw_ale.c9
-rw-r--r--drivers/net/ethernet/ti/icssg/icssg_prueth.c8
-rw-r--r--drivers/net/netconsole.c2
-rw-r--r--drivers/net/usb/r8152.c1
-rw-r--r--drivers/net/vxlan/vxlan_core.c5
-rw-r--r--drivers/net/wireless/ath/ath12k/mac.c6
-rw-r--r--drivers/net/wireless/ath/ath12k/wmi.c36
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c1
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7925/mac.c1
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7996/mac.c1
-rw-r--r--drivers/net/wireless/rsi/rsi_91x_mac80211.c2
-rw-r--r--drivers/net/wireless/st/cw1200/pm.c2
-rw-r--r--drivers/net/wireless/ti/wlcore/main.c4
-rw-r--r--drivers/pci/controller/dwc/pcie-designware-ep.c25
-rw-r--r--drivers/pinctrl/cirrus/pinctrl-cs42l43.c5
-rw-r--r--drivers/pinctrl/cix/pinctrl-sky1.c3
-rw-r--r--drivers/pinctrl/meson/pinctrl-amlogic-a4.c3
-rw-r--r--drivers/pinctrl/pinconf-generic.c4
-rw-r--r--drivers/pinctrl/pinctrl-amdisp.c2
-rw-r--r--drivers/pinctrl/pinctrl-cy8c95x0.c4
-rw-r--r--drivers/pinctrl/pinctrl-equilibrium.c31
-rw-r--r--drivers/pinctrl/pinctrl-rockchip.c12
-rw-r--r--drivers/pinctrl/qcom/pinctrl-qcs615.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-sdm660-lpass-lpi.c4
-rw-r--r--drivers/pinctrl/sunxi/pinctrl-sunxi.c51
-rw-r--r--drivers/pmdomain/imx/gpcv2.c4
-rw-r--r--drivers/regulator/Kconfig2
-rw-r--r--drivers/regulator/bq257xx-regulator.c3
-rw-r--r--drivers/regulator/fp9931.c7
-rw-r--r--drivers/regulator/tps65185.c3
-rw-r--r--drivers/scsi/lpfc/lpfc_init.c2
-rw-r--r--drivers/scsi/lpfc/lpfc_sli.c36
-rw-r--r--drivers/scsi/lpfc/lpfc_sli4.h3
-rw-r--r--drivers/scsi/mpi3mr/mpi3mr_fw.c34
-rw-r--r--drivers/scsi/pm8001/pm8001_sas.c5
-rw-r--r--drivers/scsi/ses.c5
-rw-r--r--drivers/scsi/snic/vnic_dev.c9
-rw-r--r--drivers/scsi/storvsc_drv.c5
-rw-r--r--drivers/spi/spi-stm32.c3
-rw-r--r--drivers/ufs/core/ufshcd.c47
-rw-r--r--fs/binfmt_elf.c3
-rw-r--r--fs/btrfs/block-group.c1
-rw-r--r--fs/btrfs/delayed-inode.c2
-rw-r--r--fs/btrfs/disk-io.c36
-rw-r--r--fs/btrfs/extent-tree.c8
-rw-r--r--fs/btrfs/inode.c19
-rw-r--r--fs/btrfs/ioctl.c7
-rw-r--r--fs/btrfs/qgroup.c2
-rw-r--r--fs/btrfs/relocation.c6
-rw-r--r--fs/btrfs/scrub.c2
-rw-r--r--fs/btrfs/tree-checker.c4
-rw-r--r--fs/btrfs/volumes.c8
-rw-r--r--fs/iomap/buffered-io.c15
-rw-r--r--fs/iomap/direct-io.c15
-rw-r--r--fs/iomap/ioend.c13
-rw-r--r--fs/netfs/direct_write.c228
-rw-r--r--fs/netfs/internal.h4
-rw-r--r--fs/netfs/write_collect.c21
-rw-r--r--fs/netfs/write_issue.c41
-rw-r--r--fs/nfsd/nfsctl.c22
-rw-r--r--fs/nfsd/nfssvc.c7
-rw-r--r--fs/nsfs.c15
-rw-r--r--fs/smb/client/cached_dir.c2
-rw-r--r--fs/smb/client/cifs_fs_sb.h2
-rw-r--r--fs/smb/client/cifs_ioctl.h8
-rw-r--r--fs/smb/client/cifs_unicode.c14
-rw-r--r--fs/smb/client/cifs_unicode.h14
-rw-r--r--fs/smb/client/cifsacl.c17
-rw-r--r--fs/smb/client/cifsfs.c84
-rw-r--r--fs/smb/client/cifsglob.h61
-rw-r--r--fs/smb/client/connect.c80
-rw-r--r--fs/smb/client/dfs_cache.c2
-rw-r--r--fs/smb/client/dir.c53
-rw-r--r--fs/smb/client/file.c90
-rw-r--r--fs/smb/client/fs_context.c149
-rw-r--r--fs/smb/client/fs_context.h2
-rw-r--r--fs/smb/client/inode.c142
-rw-r--r--fs/smb/client/ioctl.c2
-rw-r--r--fs/smb/client/link.c14
-rw-r--r--fs/smb/client/misc.c16
-rw-r--r--fs/smb/client/readdir.c39
-rw-r--r--fs/smb/client/reparse.c27
-rw-r--r--fs/smb/client/reparse.h4
-rw-r--r--fs/smb/client/smb1ops.c22
-rw-r--r--fs/smb/client/smb2file.c2
-rw-r--r--fs/smb/client/smb2misc.c18
-rw-r--r--fs/smb/client/smb2ops.c8
-rw-r--r--fs/smb/client/smb2pdu.c35
-rw-r--r--fs/smb/client/transport.c21
-rw-r--r--fs/smb/client/xattr.c6
-rw-r--r--fs/smb/server/Kconfig1
-rw-r--r--fs/smb/server/auth.c4
-rw-r--r--fs/smb/server/smb2pdu.c5
-rw-r--r--fs/smb/server/transport_rdma.c4
-rw-r--r--fs/squashfs/cache.c3
-rw-r--r--fs/xfs/libxfs/xfs_ag.c28
-rw-r--r--fs/xfs/libxfs/xfs_ag.h3
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c4
-rw-r--r--fs/xfs/libxfs/xfs_metafile.c5
-rw-r--r--fs/xfs/libxfs/xfs_ondisk.h52
-rw-r--r--fs/xfs/libxfs/xfs_sb.c3
-rw-r--r--fs/xfs/scrub/dir_repair.c2
-rw-r--r--fs/xfs/scrub/orphanage.c7
-rw-r--r--fs/xfs/xfs_fsops.c17
-rw-r--r--fs/xfs/xfs_health.c20
-rw-r--r--fs/xfs/xfs_healthmon.c11
-rw-r--r--fs/xfs/xfs_icache.c18
-rw-r--r--fs/xfs/xfs_mount.h2
-rw-r--r--fs/xfs/xfs_notify_failure.c4
-rw-r--r--fs/xfs/xfs_platform.h9
-rw-r--r--fs/xfs/xfs_rtalloc.c44
-rw-r--r--fs/xfs/xfs_stats.c17
-rw-r--r--fs/xfs/xfs_stats.h19
-rw-r--r--fs/xfs/xfs_super.c4
-rw-r--r--fs/xfs/xfs_verify_media.c4
-rw-r--r--fs/xfs/xfs_zone_alloc.c6
-rw-r--r--fs/xfs/xfs_zone_gc.c10
-rw-r--r--include/linux/bpf.h4
-rw-r--r--include/linux/bpf_local_storage.h2
-rw-r--r--include/linux/bpf_mem_alloc.h6
-rw-r--r--include/linux/gfp.h7
-rw-r--r--include/linux/gfp_types.h2
-rw-r--r--include/linux/indirect_call_wrapper.h18
-rw-r--r--include/linux/irqchip/arm-gic-v3.h1
-rw-r--r--include/linux/jiffies.h40
-rw-r--r--include/linux/kthread.h21
-rw-r--r--include/linux/kvm_host.h7
-rw-r--r--include/linux/liveupdate.h9
-rw-r--r--include/linux/mmc/host.h9
-rw-r--r--include/linux/netdevice.h27
-rw-r--r--include/linux/ns_common.h2
-rw-r--r--include/linux/overflow.h8
-rw-r--r--include/linux/pm_runtime.h16
-rw-r--r--include/linux/ring_buffer.h1
-rw-r--r--include/linux/rseq.h12
-rw-r--r--include/linux/rseq_entry.h8
-rw-r--r--include/linux/sched.h1
-rw-r--r--include/linux/slab.h12
-rw-r--r--include/linux/tnum.h3
-rw-r--r--include/linux/uaccess.h54
-rw-r--r--include/linux/usb/r8152.h1
-rw-r--r--include/net/act_api.h1
-rw-r--r--include/net/bonding.h1
-rw-r--r--include/net/inet6_hashtables.h2
-rw-r--r--include/net/inet_hashtables.h2
-rw-r--r--include/net/ip.h2
-rw-r--r--include/net/ip_fib.h2
-rw-r--r--include/net/libeth/xsk.h3
-rw-r--r--include/net/netfilter/nf_tables.h7
-rw-r--r--include/net/sch_generic.h10
-rw-r--r--include/net/secure_seq.h45
-rw-r--r--include/net/tc_act/tc_gate.h33
-rw-r--r--include/net/tc_act/tc_ife.h4
-rw-r--r--include/net/tcp.h6
-rw-r--r--include/net/xdp_sock_drv.h16
-rw-r--r--include/rdma/rdma_cm.h2
-rw-r--r--include/trace/events/kmem.h8
-rw-r--r--include/trace/events/netfs.h4
-rw-r--r--include/uapi/drm/drm_fourcc.h12
-rw-r--r--include/uapi/linux/pci_regs.h2
-rw-r--r--include/uapi/linux/rseq.h26
-rw-r--r--init/Kconfig2
-rw-r--r--io_uring/cmd_net.c2
-rw-r--r--io_uring/timeout.c4
-rw-r--r--kernel/bpf/arena.c2
-rw-r--r--kernel/bpf/arraymap.c2
-rw-r--r--kernel/bpf/bloom_filter.c2
-rw-r--r--kernel/bpf/bpf_insn_array.c2
-rw-r--r--kernel/bpf/bpf_local_storage.c75
-rw-r--r--kernel/bpf/cpumap.c17
-rw-r--r--kernel/bpf/devmap.c47
-rw-r--r--kernel/bpf/hashtab.c86
-rw-r--r--kernel/bpf/local_storage.c2
-rw-r--r--kernel/bpf/lpm_trie.c2
-rw-r--r--kernel/bpf/memalloc.c58
-rw-r--r--kernel/bpf/syscall.c2
-rw-r--r--kernel/bpf/tnum.c56
-rw-r--r--kernel/bpf/verifier.c30
-rw-r--r--kernel/cgroup/cgroup.c1
-rw-r--r--kernel/cgroup/cpuset.c222
-rw-r--r--kernel/dma/direct.h2
-rw-r--r--kernel/events/core.c83
-rw-r--r--kernel/exit.c6
-rw-r--r--kernel/kcsan/kcsan_test.c2
-rw-r--r--kernel/kthread.c41
-rw-r--r--kernel/liveupdate/luo_file.c41
-rw-r--r--kernel/module/Kconfig23
-rw-r--r--kernel/module/main.c13
-rw-r--r--kernel/nscommon.c6
-rw-r--r--kernel/nstree.c29
-rw-r--r--kernel/rseq.c8
-rw-r--r--kernel/sched/core.c1
-rw-r--r--kernel/sched/ext.c105
-rw-r--r--kernel/sched/ext_idle.c5
-rw-r--r--kernel/sched/ext_internal.h2
-rw-r--r--kernel/sched/fair.c150
-rw-r--r--kernel/sched/isolation.c4
-rw-r--r--kernel/sched/sched.h11
-rw-r--r--kernel/time/jiffies.c2
-rw-r--r--kernel/time/time.c19
-rw-r--r--kernel/time/timer_migration.c4
-rw-r--r--kernel/trace/bpf_trace.c4
-rw-r--r--kernel/trace/ftrace.c2
-rw-r--r--kernel/trace/ring_buffer.c21
-rw-r--r--kernel/trace/trace.c13
-rw-r--r--kernel/trace/trace_events.c54
-rw-r--r--kernel/trace/trace_functions_graph.c19
-rw-r--r--lib/Kconfig.debug5
-rw-r--r--lib/debugobjects.c19
-rw-r--r--mm/damon/core.c3
-rw-r--r--mm/huge_memory.c3
-rw-r--r--mm/kfence/core.c29
-rw-r--r--mm/memfd_luo.c7
-rw-r--r--mm/mm_init.c6
-rw-r--r--mm/page_alloc.c3
-rw-r--r--mm/slab.h4
-rw-r--r--mm/slub.c51
-rw-r--r--net/atm/lec.c26
-rw-r--r--net/batman-adv/bat_v_elp.c10
-rw-r--r--net/batman-adv/hard-interface.c8
-rw-r--r--net/batman-adv/hard-interface.h1
-rw-r--r--net/bridge/br_device.c2
-rw-r--r--net/bridge/br_input.c2
-rw-r--r--net/bridge/br_private.h10
-rw-r--r--net/bridge/br_vlan_options.c26
-rw-r--r--net/can/bcm.c1
-rw-r--r--net/core/dev.c24
-rw-r--r--net/core/devmem.c6
-rw-r--r--net/core/filter.c6
-rw-r--r--net/core/netpoll.c2
-rw-r--r--net/core/secure_seq.c80
-rw-r--r--net/core/skmsg.c14
-rw-r--r--net/ipv4/Kconfig2
-rw-r--r--net/ipv4/inet_hashtables.c8
-rw-r--r--net/ipv4/syncookies.c11
-rw-r--r--net/ipv4/sysctl_net_ipv4.c5
-rw-r--r--net/ipv4/tcp.c7
-rw-r--r--net/ipv4/tcp_ao.c3
-rw-r--r--net/ipv4/tcp_bpf.c2
-rw-r--r--net/ipv4/tcp_diag.c2
-rw-r--r--net/ipv4/tcp_input.c38
-rw-r--r--net/ipv4/tcp_ipv4.c40
-rw-r--r--net/ipv4/tcp_minisocks.c2
-rw-r--r--net/ipv4/udp.c27
-rw-r--r--net/ipv4/udp_bpf.c2
-rw-r--r--net/ipv6/inet6_hashtables.c3
-rw-r--r--net/ipv6/route.c11
-rw-r--r--net/ipv6/syncookies.c11
-rw-r--r--net/ipv6/tcp_ipv6.c40
-rw-r--r--net/mac80211/eht.c1
-rw-r--r--net/mptcp/pm.c55
-rw-r--r--net/mptcp/pm_kernel.c9
-rw-r--r--net/netfilter/nf_tables_api.c45
-rw-r--r--net/netfilter/nft_set_hash.c1
-rw-r--r--net/netfilter/nft_set_pipapo.c62
-rw-r--r--net/netfilter/nft_set_pipapo.h2
-rw-r--r--net/netfilter/nft_set_rbtree.c8
-rw-r--r--net/nfc/digital_core.c8
-rw-r--r--net/nfc/nci/core.c30
-rw-r--r--net/nfc/nci/data.c12
-rw-r--r--net/nfc/rawsock.c11
-rw-r--r--net/rds/tcp.c14
-rw-r--r--net/sched/act_ct.c6
-rw-r--r--net/sched/act_gate.c265
-rw-r--r--net/sched/act_ife.c93
-rw-r--r--net/sched/cls_api.c7
-rw-r--r--net/sched/sch_cake.c53
-rw-r--r--net/sched/sch_ets.c12
-rw-r--r--net/sched/sch_fq.c1
-rw-r--r--net/unix/af_unix.c8
-rw-r--r--net/xdp/xsk.c26
-rw-r--r--rust/kernel/io.rs131
-rw-r--r--rust/kernel/pci/io.rs24
-rw-r--r--sound/hda/codecs/realtek/alc269.c37
-rw-r--r--sound/hda/codecs/side-codecs/cs35l56_hda.c7
-rw-r--r--sound/hda/controllers/intel.c2
-rw-r--r--sound/soc/amd/yc/acp6x-mach.c7
-rw-r--r--sound/soc/codecs/cs42l43-jack.c1
-rw-r--r--sound/soc/sdca/sdca_interrupts.c4
-rw-r--r--sound/usb/endpoint.c10
-rw-r--r--sound/usb/format.c70
-rw-r--r--sound/usb/mixer_s1810c.c12
-rw-r--r--sound/usb/mixer_scarlett2.c10
-rw-r--r--sound/usb/qcom/qc_audio_offload.c2
-rw-r--r--sound/usb/quirks.c6
-rw-r--r--sound/usb/stream.c3
-rw-r--r--sound/usb/usbaudio.h6
-rw-r--r--sound/usb/usx2y/us144mkii.c14
-rw-r--r--sound/usb/usx2y/us144mkii_capture.c12
-rw-r--r--sound/usb/usx2y/us144mkii_controls.c42
-rw-r--r--sound/usb/usx2y/us144mkii_midi.c22
-rw-r--r--sound/usb/usx2y/us144mkii_playback.c10
-rw-r--r--sound/usb/validate.c2
-rw-r--r--tools/bpf/resolve_btfids/Makefile7
-rw-r--r--tools/bpf/resolve_btfids/main.c81
-rw-r--r--tools/include/linux/args.h4
-rw-r--r--tools/sched_ext/Kconfig61
-rw-r--r--tools/sched_ext/Makefile2
-rw-r--r--tools/sched_ext/README.md6
-rw-r--r--tools/sched_ext/include/scx/compat.h7
-rw-r--r--tools/sched_ext/scx_central.c2
-rw-r--r--tools/sched_ext/scx_sdt.c2
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/gcs_prot_none_fault.c76
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.asan3
-rw-r--r--tools/testing/selftests/bpf/Makefile13
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_trigger.c14
-rw-r--r--tools/testing/selftests/bpf/bpf_util.h45
-rw-r--r--tools/testing/selftests/bpf/bpftool_helpers.c25
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.c2
-rw-r--r--tools/testing/selftests/bpf/jit_disasm_helpers.c18
-rw-r--r--tools/testing/selftests/bpf/network_helpers.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/dynptr.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fd_array.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/htab_update.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c12
-rw-r--r--tools/testing/selftests/bpf/prog_tests/lwt_seg6local.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_kptr_race.c218
-rw-r--r--tools/testing/selftests/bpf/prog_tests/queue_stack_map.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/reg_bounds.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/setget_sockopt.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/skc_to_unix_sock.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_basic.c28
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_listen.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_sk.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/struct_ops_private_stack.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_local_data.h2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_opts.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_redirect.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_sysctl.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_veristat.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_xsk.c24
-rw-r--r--tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verifier_log.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_bonding.c58
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_metadata.c4
-rw-r--r--tools/testing/selftests/bpf/progs/dmabuf_iter.c2
-rw-r--r--tools/testing/selftests/bpf/progs/map_kptr_race.c197
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bounds.c137
-rw-r--r--tools/testing/selftests/bpf/test_progs.c38
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c2
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.c1
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.c23
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.h11
-rw-r--r--tools/testing/selftests/bpf/veristat.c2
-rw-r--r--tools/testing/selftests/bpf/xdp_features.c3
-rw-r--r--tools/testing/selftests/bpf/xdp_hw_metadata.c4
-rwxr-xr-xtools/testing/selftests/cgroup/test_cpuset_prs.sh224
-rw-r--r--tools/testing/selftests/filesystems/nsfs/iterate_mntns.c25
-rw-r--r--tools/testing/selftests/kselftest_harness.h7
-rw-r--r--tools/testing/selftests/net/Makefile1
-rwxr-xr-xtools/testing/selftests/net/bridge_vlan_dump.sh204
-rwxr-xr-xtools/testing/selftests/net/fib_nexthops.sh11
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh49
-rwxr-xr-xtools/testing/selftests/net/mptcp/simult_flows.sh11
-rw-r--r--tools/testing/selftests/net/netfilter/nf_queue.c10
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_queue.sh13
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt2
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt33
-rw-r--r--tools/testing/selftests/net/tun.c12
-rw-r--r--tools/testing/selftests/sched_ext/Makefile2
-rw-r--r--tools/testing/selftests/sched_ext/init_enable_count.c3
-rw-r--r--tools/testing/selftests/sched_ext/peek_dsq.bpf.c4
-rw-r--r--tools/testing/selftests/sched_ext/rt_stall.c1
-rw-r--r--tools/testing/selftests/sched_ext/runner.c3
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/ct.json159
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/ife.json99
-rw-r--r--tools/testing/selftests/tc-testing/tdc_helper.py10
-rw-r--r--virt/kvm/Kconfig9
-rw-r--r--virt/kvm/kvm_main.c17
591 files changed, 7278 insertions, 3295 deletions
diff --git a/.mailmap b/.mailmap
index e1cf6bb85d33..628fd86d6a23 100644
--- a/.mailmap
+++ b/.mailmap
@@ -210,7 +210,12 @@ Daniel Borkmann <daniel@iogearbox.net> <daniel.borkmann@tik.ee.ethz.ch>
Daniel Borkmann <daniel@iogearbox.net> <dborkmann@redhat.com>
Daniel Borkmann <daniel@iogearbox.net> <dborkman@redhat.com>
Daniel Borkmann <daniel@iogearbox.net> <dxchgb@gmail.com>
+Daniel Lezcano <daniel.lezcano@kernel.org> <daniel.lezcano@linaro.org>
+Daniel Lezcano <daniel.lezcano@kernel.org> <daniel.lezcano@free.fr>
+Daniel Lezcano <daniel.lezcano@kernel.org> <daniel.lezcano@linexp.org>
+Daniel Lezcano <daniel.lezcano@kernel.org> <dlezcano@fr.ibm.com>
Daniel Thompson <danielt@kernel.org> <daniel.thompson@linaro.org>
+Daniele Alessandrelli <daniele.alessandrelli@gmail.com> <daniele.alessandrelli@intel.com>
Danilo Krummrich <dakr@kernel.org> <dakr@redhat.com>
David Brownell <david-b@pacbell.net>
David Collins <quic_collinsd@quicinc.com> <collinsd@codeaurora.org>
@@ -348,6 +353,7 @@ Jarkko Sakkinen <jarkko@kernel.org> <jarkko.sakkinen@opinsys.com>
Jason Gunthorpe <jgg@ziepe.ca> <jgg@mellanox.com>
Jason Gunthorpe <jgg@ziepe.ca> <jgg@nvidia.com>
Jason Gunthorpe <jgg@ziepe.ca> <jgunthorpe@obsidianresearch.com>
+Jason Xing <kerneljasonxing@gmail.com> <kernelxing@tencent.com>
<javier@osg.samsung.com> <javier.martinez@collabora.co.uk>
Javi Merino <javi.merino@kernel.org> <javi.merino@arm.com>
Jayachandran C <c.jayachandran@gmail.com> <jayachandranc@netlogicmicro.com>
@@ -396,6 +402,7 @@ Jiri Slaby <jirislaby@kernel.org> <xslaby@fi.muni.cz>
Jisheng Zhang <jszhang@kernel.org> <jszhang@marvell.com>
Jisheng Zhang <jszhang@kernel.org> <Jisheng.Zhang@synaptics.com>
Jishnu Prakash <quic_jprakash@quicinc.com> <jprakash@codeaurora.org>
+Joe Damato <joe@dama.to> <jdamato@fastly.com>
Joel Granados <joel.granados@kernel.org> <j.granados@samsung.com>
Johan Hovold <johan@kernel.org> <jhovold@gmail.com>
Johan Hovold <johan@kernel.org> <johan@hovoldconsulting.com>
@@ -876,6 +883,7 @@ Vivien Didelot <vivien.didelot@gmail.com> <vivien.didelot@savoirfairelinux.com>
Vlad Dogaru <ddvlad@gmail.com> <vlad.dogaru@intel.com>
Vladimir Davydov <vdavydov.dev@gmail.com> <vdavydov@parallels.com>
Vladimir Davydov <vdavydov.dev@gmail.com> <vdavydov@virtuozzo.com>
+Vlastimil Babka <vbabka@kernel.org> <vbabka@suse.cz>
WangYuli <wangyuli@aosc.io> <wangyl5933@chinaunicom.cn>
WangYuli <wangyuli@aosc.io> <wangyuli@deepin.org>
Weiwen Hu <huweiwen@linux.alibaba.com> <sehuww@mail.scut.edu.cn>
@@ -890,7 +898,8 @@ Yanteng Si <si.yanteng@linux.dev> <siyanteng@loongson.cn>
Ying Huang <huang.ying.caritas@gmail.com> <ying.huang@intel.com>
Yixun Lan <dlan@kernel.org> <dlan@gentoo.org>
Yixun Lan <dlan@kernel.org> <yixun.lan@amlogic.com>
-Yosry Ahmed <yosry.ahmed@linux.dev> <yosryahmed@google.com>
+Yosry Ahmed <yosry@kernel.org> <yosryahmed@google.com>
+Yosry Ahmed <yosry@kernel.org> <yosry.ahmed@linux.dev>
Yu-Chun Lin <eleanor.lin@realtek.com> <eleanor15x@gmail.com>
Yusuke Goda <goda.yusuke@renesas.com>
Zack Rusin <zack.rusin@broadcom.com> <zackr@vmware.com>
diff --git a/CREDITS b/CREDITS
index d74c8b2b7ed3..9091bac3d2da 100644
--- a/CREDITS
+++ b/CREDITS
@@ -1242,6 +1242,10 @@ N: Veaceslav Falico
E: vfalico@gmail.com
D: Co-maintainer and co-author of the network bonding driver.
+N: Thomas Falcon
+E: tlfalcon@linux.ibm.com
+D: Initial author of the IBM ibmvnic network driver
+
N: János Farkas
E: chexum@shadow.banki.hu
D: romfs, various (mostly networking) fixes
@@ -2415,6 +2419,10 @@ S: Am Muehlenweg 38
S: D53424 Remagen
S: Germany
+N: Jonathan Lemon
+E: jonathan.lemon@gmail.com
+D: OpenCompute PTP clock driver (ptp_ocp)
+
N: Colin Leroy
E: colin@colino.net
W: http://www.geekounet.org/
diff --git a/Documentation/devicetree/bindings/net/can/nxp,sja1000.yaml b/Documentation/devicetree/bindings/net/can/nxp,sja1000.yaml
index ec0c2168e4b9..6bcfff970117 100644
--- a/Documentation/devicetree/bindings/net/can/nxp,sja1000.yaml
+++ b/Documentation/devicetree/bindings/net/can/nxp,sja1000.yaml
@@ -87,6 +87,7 @@ required:
allOf:
- $ref: can-controller.yaml#
+ - $ref: /schemas/memory-controllers/mc-peripheral-props.yaml
- if:
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/regulator/mt6359-regulator.yaml b/Documentation/devicetree/bindings/regulator/mt6359-regulator.yaml
index d6b3b5a5c0b3..fe4ac9350ba0 100644
--- a/Documentation/devicetree/bindings/regulator/mt6359-regulator.yaml
+++ b/Documentation/devicetree/bindings/regulator/mt6359-regulator.yaml
@@ -287,7 +287,7 @@ examples:
regulator-max-microvolt = <1700000>;
};
mt6359_vrfck_1_ldo_reg: ldo_vrfck_1 {
- regulator-name = "vrfck";
+ regulator-name = "vrfck_1";
regulator-min-microvolt = <1240000>;
regulator-max-microvolt = <1600000>;
};
@@ -309,7 +309,7 @@ examples:
regulator-max-microvolt = <3300000>;
};
mt6359_vemc_1_ldo_reg: ldo_vemc_1 {
- regulator-name = "vemc";
+ regulator-name = "vemc_1";
regulator-min-microvolt = <2500000>;
regulator-max-microvolt = <3300000>;
};
diff --git a/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml b/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml
index 81838577cf9c..8ebebcebca16 100644
--- a/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml
+++ b/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml
@@ -26,21 +26,6 @@ allOf:
properties:
compatible:
contains:
- enum:
- - baikal,bt1-sys-ssi
- then:
- properties:
- mux-controls:
- maxItems: 1
- required:
- - mux-controls
- else:
- required:
- - interrupts
- - if:
- properties:
- compatible:
- contains:
const: amd,pensando-elba-spi
then:
required:
@@ -75,10 +60,6 @@ properties:
const: intel,mountevans-imc-ssi
- description: AMD Pensando Elba SoC SPI Controller
const: amd,pensando-elba-spi
- - description: Baikal-T1 SPI Controller
- const: baikal,bt1-ssi
- - description: Baikal-T1 System Boot SPI Controller
- const: baikal,bt1-sys-ssi
- description: Canaan Kendryte K210 SoS SPI Controller
const: canaan,k210-spi
- description: Renesas RZ/N1 SPI Controller
@@ -170,6 +151,7 @@ required:
- "#address-cells"
- "#size-cells"
- clocks
+ - interrupts
examples:
- |
@@ -190,15 +172,4 @@ examples:
rx-sample-delay-ns = <7>;
};
};
- - |
- spi@1f040100 {
- compatible = "baikal,bt1-sys-ssi";
- reg = <0x1f040100 0x900>,
- <0x1c000000 0x1000000>;
- #address-cells = <1>;
- #size-cells = <0>;
- mux-controls = <&boot_mux>;
- clocks = <&ccu_sys>;
- clock-names = "ssi_clk";
- };
...
diff --git a/Documentation/netlink/specs/nfsd.yaml b/Documentation/netlink/specs/nfsd.yaml
index badb2fe57c98..f87b5a05e5e9 100644
--- a/Documentation/netlink/specs/nfsd.yaml
+++ b/Documentation/netlink/specs/nfsd.yaml
@@ -152,7 +152,7 @@ operations:
- compound-ops
-
name: threads-set
- doc: set the number of running threads
+ doc: set the maximum number of running threads
attribute-set: server
flags: [admin-perm]
do:
@@ -165,7 +165,7 @@ operations:
- min-threads
-
name: threads-get
- doc: get the number of running threads
+ doc: get the maximum number of running threads
attribute-set: server
do:
reply:
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index fc5736839edd..6f85e1b321dd 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -1396,7 +1396,10 @@ or its flags may be modified, but it may not be resized.
Memory for the region is taken starting at the address denoted by the
field userspace_addr, which must point at user addressable memory for
the entire memory slot size. Any object may back this memory, including
-anonymous memory, ordinary files, and hugetlbfs.
+anonymous memory, ordinary files, and hugetlbfs. Changes in the backing
+of the memory region are automatically reflected into the guest.
+For example, an mmap() that affects the region will be made visible
+immediately. Another example is madvise(MADV_DROP).
On architectures that support a form of address tagging, userspace_addr must
be an untagged address.
@@ -1412,11 +1415,6 @@ use it. The latter can be set, if KVM_CAP_READONLY_MEM capability allows it,
to make a new slot read-only. In this case, writes to this memory will be
posted to userspace as KVM_EXIT_MMIO exits.
-When the KVM_CAP_SYNC_MMU capability is available, changes in the backing of
-the memory region are automatically reflected into the guest. For example, an
-mmap() that affects the region will be made visible immediately. Another
-example is madvise(MADV_DROP).
-
For TDX guest, deleting/moving memory region loses guest memory contents.
Read only region isn't supported. Only as-id 0 is supported.
diff --git a/MAINTAINERS b/MAINTAINERS
index 2265e2c9bfbe..7b277d5bf3d1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -993,10 +993,8 @@ F: Documentation/devicetree/bindings/thermal/amazon,al-thermal.yaml
F: drivers/thermal/thermal_mmio.c
AMAZON ETHERNET DRIVERS
-M: Shay Agroskin <shayagr@amazon.com>
M: Arthur Kiyanovski <akiyano@amazon.com>
-R: David Arinzon <darinzon@amazon.com>
-R: Saeed Bishara <saeedb@amazon.com>
+M: David Arinzon <darinzon@amazon.com>
L: netdev@vger.kernel.org
S: Maintained
F: Documentation/networking/device_drivers/ethernet/amazon/ena.rst
@@ -4617,7 +4615,6 @@ F: drivers/bluetooth/
BLUETOOTH SUBSYSTEM
M: Marcel Holtmann <marcel@holtmann.org>
-M: Johan Hedberg <johan.hedberg@gmail.com>
M: Luiz Augusto von Dentz <luiz.dentz@gmail.com>
L: linux-bluetooth@vger.kernel.org
S: Supported
@@ -6212,6 +6209,7 @@ F: drivers/scsi/fnic/
CISCO SCSI HBA DRIVER
M: Karan Tilak Kumar <kartilak@cisco.com>
+M: Narsimhulu Musini <nmusini@cisco.com>
M: Sesidhar Baddela <sebaddel@cisco.com>
L: linux-scsi@vger.kernel.org
S: Supported
@@ -6278,7 +6276,7 @@ S: Maintained
F: include/linux/clk.h
CLOCKSOURCE, CLOCKEVENT DRIVERS
-M: Daniel Lezcano <daniel.lezcano@linaro.org>
+M: Daniel Lezcano <daniel.lezcano@kernel.org>
M: Thomas Gleixner <tglx@kernel.org>
L: linux-kernel@vger.kernel.org
S: Supported
@@ -6667,7 +6665,7 @@ F: rust/kernel/cpu.rs
CPU IDLE TIME MANAGEMENT FRAMEWORK
M: "Rafael J. Wysocki" <rafael@kernel.org>
-M: Daniel Lezcano <daniel.lezcano@linaro.org>
+M: Daniel Lezcano <daniel.lezcano@kernel.org>
R: Christian Loehle <christian.loehle@arm.com>
L: linux-pm@vger.kernel.org
S: Maintained
@@ -6697,7 +6695,7 @@ F: arch/x86/kernel/msr.c
CPUIDLE DRIVER - ARM BIG LITTLE
M: Lorenzo Pieralisi <lpieralisi@kernel.org>
-M: Daniel Lezcano <daniel.lezcano@linaro.org>
+M: Daniel Lezcano <daniel.lezcano@kernel.org>
L: linux-pm@vger.kernel.org
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Maintained
@@ -6705,7 +6703,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git
F: drivers/cpuidle/cpuidle-big_little.c
CPUIDLE DRIVER - ARM EXYNOS
-M: Daniel Lezcano <daniel.lezcano@linaro.org>
+M: Daniel Lezcano <daniel.lezcano@kernel.org>
M: Kukjin Kim <kgene@kernel.org>
R: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-pm@vger.kernel.org
@@ -10170,8 +10168,8 @@ F: drivers/i2c/busses/i2c-cpm.c
FREESCALE IMX / MXC FEC DRIVER
M: Wei Fang <wei.fang@nxp.com>
+R: Frank Li <frank.li@nxp.com>
R: Shenwei Wang <shenwei.wang@nxp.com>
-R: Clark Wang <xiaoning.wang@nxp.com>
L: imx@lists.linux.dev
L: netdev@vger.kernel.org
S: Maintained
@@ -10483,7 +10481,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace.git
F: Documentation/trace/ftrace*
F: arch/*/*/*/*ftrace*
F: arch/*/*/*ftrace*
-F: include/*/ftrace.h
+F: include/*/*ftrace*
F: kernel/trace/fgraph.c
F: kernel/trace/ftrace*
F: samples/ftrace
@@ -12215,7 +12213,6 @@ IBM Power SRIOV Virtual NIC Device Driver
M: Haren Myneni <haren@linux.ibm.com>
M: Rick Lindsley <ricklind@linux.ibm.com>
R: Nick Child <nnac123@linux.ibm.com>
-R: Thomas Falcon <tlfalcon@linux.ibm.com>
L: netdev@vger.kernel.org
S: Maintained
F: drivers/net/ethernet/ibm/ibmvnic.*
@@ -15374,10 +15371,8 @@ F: drivers/crypto/marvell/
F: include/linux/soc/marvell/octeontx2/
MARVELL GIGABIT ETHERNET DRIVERS (skge/sky2)
-M: Mirko Lindner <mlindner@marvell.com>
-M: Stephen Hemminger <stephen@networkplumber.org>
L: netdev@vger.kernel.org
-S: Odd fixes
+S: Orphan
F: drivers/net/ethernet/marvell/sk*
MARVELL LIBERTAS WIRELESS DRIVER
@@ -15474,7 +15469,6 @@ MARVELL OCTEONTX2 RVU ADMIN FUNCTION DRIVER
M: Sunil Goutham <sgoutham@marvell.com>
M: Linu Cherian <lcherian@marvell.com>
M: Geetha sowjanya <gakula@marvell.com>
-M: Jerin Jacob <jerinj@marvell.com>
M: hariprasad <hkelam@marvell.com>
M: Subbaraya Sundeep <sbhatta@marvell.com>
L: netdev@vger.kernel.org
@@ -15489,7 +15483,7 @@ S: Supported
F: drivers/perf/marvell_pem_pmu.c
MARVELL PRESTERA ETHERNET SWITCH DRIVER
-M: Taras Chornyi <taras.chornyi@plvision.eu>
+M: Elad Nachman <enachman@marvell.com>
S: Supported
W: https://github.com/Marvell-switching/switchdev-prestera
F: drivers/net/ethernet/marvell/prestera/
@@ -16163,7 +16157,6 @@ F: drivers/dma/mediatek/
MEDIATEK ETHERNET DRIVER
M: Felix Fietkau <nbd@nbd.name>
-M: Sean Wang <sean.wang@mediatek.com>
M: Lorenzo Bianconi <lorenzo@kernel.org>
L: netdev@vger.kernel.org
S: Maintained
@@ -16356,8 +16349,6 @@ F: include/soc/mediatek/smi.h
MEDIATEK SWITCH DRIVER
M: Chester A. Unal <chester.a.unal@arinc9.com>
M: Daniel Golle <daniel@makrotopia.org>
-M: DENG Qingfang <dqfext@gmail.com>
-M: Sean Wang <sean.wang@mediatek.com>
L: netdev@vger.kernel.org
S: Maintained
F: drivers/net/dsa/mt7530-mdio.c
@@ -16654,7 +16645,7 @@ M: Andrew Morton <akpm@linux-foundation.org>
M: David Hildenbrand <david@kernel.org>
R: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
R: Liam R. Howlett <Liam.Howlett@oracle.com>
-R: Vlastimil Babka <vbabka@suse.cz>
+R: Vlastimil Babka <vbabka@kernel.org>
R: Mike Rapoport <rppt@kernel.org>
R: Suren Baghdasaryan <surenb@google.com>
R: Michal Hocko <mhocko@suse.com>
@@ -16784,7 +16775,7 @@ M: Andrew Morton <akpm@linux-foundation.org>
M: David Hildenbrand <david@kernel.org>
R: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
R: Liam R. Howlett <Liam.Howlett@oracle.com>
-R: Vlastimil Babka <vbabka@suse.cz>
+R: Vlastimil Babka <vbabka@kernel.org>
R: Mike Rapoport <rppt@kernel.org>
R: Suren Baghdasaryan <surenb@google.com>
R: Michal Hocko <mhocko@suse.com>
@@ -16839,7 +16830,7 @@ F: mm/oom_kill.c
MEMORY MANAGEMENT - PAGE ALLOCATOR
M: Andrew Morton <akpm@linux-foundation.org>
-M: Vlastimil Babka <vbabka@suse.cz>
+M: Vlastimil Babka <vbabka@kernel.org>
R: Suren Baghdasaryan <surenb@google.com>
R: Michal Hocko <mhocko@suse.com>
R: Brendan Jackman <jackmanb@google.com>
@@ -16885,7 +16876,7 @@ M: David Hildenbrand <david@kernel.org>
M: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
R: Rik van Riel <riel@surriel.com>
R: Liam R. Howlett <Liam.Howlett@oracle.com>
-R: Vlastimil Babka <vbabka@suse.cz>
+R: Vlastimil Babka <vbabka@kernel.org>
R: Harry Yoo <harry.yoo@oracle.com>
R: Jann Horn <jannh@google.com>
L: linux-mm@kvack.org
@@ -16984,7 +16975,7 @@ MEMORY MAPPING
M: Andrew Morton <akpm@linux-foundation.org>
M: Liam R. Howlett <Liam.Howlett@oracle.com>
M: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
-R: Vlastimil Babka <vbabka@suse.cz>
+R: Vlastimil Babka <vbabka@kernel.org>
R: Jann Horn <jannh@google.com>
R: Pedro Falcato <pfalcato@suse.de>
L: linux-mm@kvack.org
@@ -17014,7 +17005,7 @@ M: Andrew Morton <akpm@linux-foundation.org>
M: Suren Baghdasaryan <surenb@google.com>
M: Liam R. Howlett <Liam.Howlett@oracle.com>
M: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
-R: Vlastimil Babka <vbabka@suse.cz>
+R: Vlastimil Babka <vbabka@kernel.org>
R: Shakeel Butt <shakeel.butt@linux.dev>
L: linux-mm@kvack.org
S: Maintained
@@ -17030,7 +17021,7 @@ M: Andrew Morton <akpm@linux-foundation.org>
M: Liam R. Howlett <Liam.Howlett@oracle.com>
M: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
M: David Hildenbrand <david@kernel.org>
-R: Vlastimil Babka <vbabka@suse.cz>
+R: Vlastimil Babka <vbabka@kernel.org>
R: Jann Horn <jannh@google.com>
L: linux-mm@kvack.org
S: Maintained
@@ -19225,8 +19216,6 @@ F: tools/objtool/
OCELOT ETHERNET SWITCH DRIVER
M: Vladimir Oltean <vladimir.oltean@nxp.com>
-M: Claudiu Manoil <claudiu.manoil@nxp.com>
-M: Alexandre Belloni <alexandre.belloni@bootlin.com>
M: UNGLinuxDriver@microchip.com
L: netdev@vger.kernel.org
S: Supported
@@ -19812,7 +19801,6 @@ F: arch/*/boot/dts/
F: include/dt-bindings/
OPENCOMPUTE PTP CLOCK DRIVER
-M: Jonathan Lemon <jonathan.lemon@gmail.com>
M: Vadim Fedorenko <vadim.fedorenko@linux.dev>
L: netdev@vger.kernel.org
S: Maintained
@@ -20507,7 +20495,7 @@ F: Documentation/devicetree/bindings/pci/hisilicon,kirin-pcie.yaml
F: drivers/pci/controller/dwc/pcie-kirin.c
PCIE DRIVER FOR HISILICON STB
-M: Shawn Guo <shawn.guo@linaro.org>
+M: Shawn Guo <shawnguo@kernel.org>
L: linux-pci@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/pci/hisilicon-histb-pcie.txt
@@ -21456,9 +21444,8 @@ S: Supported
F: drivers/scsi/qedi/
QLOGIC QL4xxx ETHERNET DRIVER
-M: Manish Chopra <manishc@marvell.com>
L: netdev@vger.kernel.org
-S: Maintained
+S: Orphan
F: drivers/net/ethernet/qlogic/qed/
F: drivers/net/ethernet/qlogic/qede/
F: include/linux/qed/
@@ -23172,7 +23159,7 @@ K: \b(?i:rust)\b
RUST [ALLOC]
M: Danilo Krummrich <dakr@kernel.org>
R: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
-R: Vlastimil Babka <vbabka@suse.cz>
+R: Vlastimil Babka <vbabka@kernel.org>
R: Liam R. Howlett <Liam.Howlett@oracle.com>
R: Uladzislau Rezki <urezki@gmail.com>
L: rust-for-linux@vger.kernel.org
@@ -24348,7 +24335,7 @@ F: Documentation/devicetree/bindings/nvmem/layouts/kontron,sl28-vpd.yaml
F: drivers/nvmem/layouts/sl28vpd.c
SLAB ALLOCATOR
-M: Vlastimil Babka <vbabka@suse.cz>
+M: Vlastimil Babka <vbabka@kernel.org>
M: Andrew Morton <akpm@linux-foundation.org>
R: Christoph Lameter <cl@gentwo.org>
R: David Rientjes <rientjes@google.com>
@@ -26215,7 +26202,7 @@ F: drivers/media/radio/radio-raremono.c
THERMAL
M: Rafael J. Wysocki <rafael@kernel.org>
-M: Daniel Lezcano <daniel.lezcano@linaro.org>
+M: Daniel Lezcano <daniel.lezcano@kernel.org>
R: Zhang Rui <rui.zhang@intel.com>
R: Lukasz Luba <lukasz.luba@arm.com>
L: linux-pm@vger.kernel.org
@@ -26245,7 +26232,7 @@ F: drivers/thermal/amlogic_thermal.c
THERMAL/CPU_COOLING
M: Amit Daniel Kachhap <amit.kachhap@gmail.com>
-M: Daniel Lezcano <daniel.lezcano@linaro.org>
+M: Daniel Lezcano <daniel.lezcano@kernel.org>
M: Viresh Kumar <viresh.kumar@linaro.org>
R: Lukasz Luba <lukasz.luba@arm.com>
L: linux-pm@vger.kernel.org
@@ -29184,7 +29171,7 @@ K: zstd
ZSWAP COMPRESSED SWAP CACHING
M: Johannes Weiner <hannes@cmpxchg.org>
-M: Yosry Ahmed <yosry.ahmed@linux.dev>
+M: Yosry Ahmed <yosry@kernel.org>
M: Nhat Pham <nphamcs@gmail.com>
R: Chengming Zhou <chengming.zhou@linux.dev>
L: linux-mm@kvack.org
diff --git a/Makefile b/Makefile
index e944c6e71e81..2446085983f7 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 7
PATCHLEVEL = 0
SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
NAME = Baby Opossum Posse
# *DOCUMENTATION*
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 83e03abbb2ca..8cbd1e96fd50 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -264,19 +264,33 @@ __iowrite64_copy(void __iomem *to, const void *from, size_t count)
typedef int (*ioremap_prot_hook_t)(phys_addr_t phys_addr, size_t size,
pgprot_t *prot);
int arm64_ioremap_prot_hook_register(const ioremap_prot_hook_t hook);
+void __iomem *__ioremap_prot(phys_addr_t phys, size_t size, pgprot_t prot);
-#define ioremap_prot ioremap_prot
+static inline void __iomem *ioremap_prot(phys_addr_t phys, size_t size,
+ pgprot_t user_prot)
+{
+ pgprot_t prot;
+ ptdesc_t user_prot_val = pgprot_val(user_prot);
+
+ if (WARN_ON_ONCE(!(user_prot_val & PTE_USER)))
+ return NULL;
-#define _PAGE_IOREMAP PROT_DEVICE_nGnRE
+ prot = __pgprot_modify(PAGE_KERNEL, PTE_ATTRINDX_MASK,
+ user_prot_val & PTE_ATTRINDX_MASK);
+ return __ioremap_prot(phys, size, prot);
+}
+#define ioremap_prot ioremap_prot
+#define ioremap(addr, size) \
+ __ioremap_prot((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
#define ioremap_wc(addr, size) \
- ioremap_prot((addr), (size), __pgprot(PROT_NORMAL_NC))
+ __ioremap_prot((addr), (size), __pgprot(PROT_NORMAL_NC))
#define ioremap_np(addr, size) \
- ioremap_prot((addr), (size), __pgprot(PROT_DEVICE_nGnRnE))
+ __ioremap_prot((addr), (size), __pgprot(PROT_DEVICE_nGnRnE))
#define ioremap_encrypted(addr, size) \
- ioremap_prot((addr), (size), PAGE_KERNEL)
+ __ioremap_prot((addr), (size), PAGE_KERNEL)
/*
* io{read,write}{16,32,64}be() macros
@@ -297,7 +311,7 @@ static inline void __iomem *ioremap_cache(phys_addr_t addr, size_t size)
if (pfn_is_map_memory(__phys_to_pfn(addr)))
return (void __iomem *)__phys_to_virt(addr);
- return ioremap_prot(addr, size, __pgprot(PROT_NORMAL));
+ return __ioremap_prot(addr, size, __pgprot(PROT_NORMAL));
}
/*
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 5d5a3bbdb95e..2ca264b3db5f 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -1616,7 +1616,8 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val);
(kvm_has_feat((k), ID_AA64MMFR3_EL1, S1PIE, IMP))
#define kvm_has_s1poe(k) \
- (kvm_has_feat((k), ID_AA64MMFR3_EL1, S1POE, IMP))
+ (system_supports_poe() && \
+ kvm_has_feat((k), ID_AA64MMFR3_EL1, S1POE, IMP))
#define kvm_has_ras(k) \
(kvm_has_feat((k), ID_AA64PFR0_EL1, RAS, IMP))
diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
index 905c658057a4..091544e6af44 100644
--- a/arch/arm64/include/asm/kvm_nested.h
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -397,6 +397,8 @@ int kvm_vcpu_allocate_vncr_tlb(struct kvm_vcpu *vcpu);
int kvm_handle_vncr_abort(struct kvm_vcpu *vcpu);
void kvm_handle_s1e2_tlbi(struct kvm_vcpu *vcpu, u32 inst, u64 val);
+u16 get_asid_by_regime(struct kvm_vcpu *vcpu, enum trans_regime regime);
+
#define vncr_fixmap(c) \
({ \
u32 __c = (c); \
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index d27e8872fe3c..2b32639160de 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -164,9 +164,6 @@ static inline bool __pure lpa2_is_enabled(void)
#define _PAGE_GCS (_PAGE_DEFAULT | PTE_NG | PTE_UXN | PTE_WRITE | PTE_USER)
#define _PAGE_GCS_RO (_PAGE_DEFAULT | PTE_NG | PTE_UXN | PTE_USER)
-#define PAGE_GCS __pgprot(_PAGE_GCS)
-#define PAGE_GCS_RO __pgprot(_PAGE_GCS_RO)
-
#define PIE_E0 ( \
PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS), PIE_GCS) | \
PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS_RO), PIE_R) | \
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index a2d65d7d6aae..1416e652612b 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -31,19 +31,11 @@
*/
#define __TLBI_0(op, arg) asm (ARM64_ASM_PREAMBLE \
"tlbi " #op "\n" \
- ALTERNATIVE("nop\n nop", \
- "dsb ish\n tlbi " #op, \
- ARM64_WORKAROUND_REPEAT_TLBI, \
- CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \
: : )
#define __TLBI_1(op, arg) asm (ARM64_ASM_PREAMBLE \
- "tlbi " #op ", %0\n" \
- ALTERNATIVE("nop\n nop", \
- "dsb ish\n tlbi " #op ", %0", \
- ARM64_WORKAROUND_REPEAT_TLBI, \
- CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \
- : : "r" (arg))
+ "tlbi " #op ", %x0\n" \
+ : : "rZ" (arg))
#define __TLBI_N(op, arg, n, ...) __TLBI_##n(op, arg)
@@ -181,6 +173,34 @@ static inline unsigned long get_trans_granule(void)
(__pages >> (5 * (scale) + 1)) - 1; \
})
+#define __repeat_tlbi_sync(op, arg...) \
+do { \
+ if (!alternative_has_cap_unlikely(ARM64_WORKAROUND_REPEAT_TLBI)) \
+ break; \
+ __tlbi(op, ##arg); \
+ dsb(ish); \
+} while (0)
+
+/*
+ * Complete broadcast TLB maintenance issued by the host which invalidates
+ * stage 1 information in the host's own translation regime.
+ */
+static inline void __tlbi_sync_s1ish(void)
+{
+ dsb(ish);
+ __repeat_tlbi_sync(vale1is, 0);
+}
+
+/*
+ * Complete broadcast TLB maintenance issued by hyp code which invalidates
+ * stage 1 translation information in any translation regime.
+ */
+static inline void __tlbi_sync_s1ish_hyp(void)
+{
+ dsb(ish);
+ __repeat_tlbi_sync(vale2is, 0);
+}
+
/*
* TLB Invalidation
* ================
@@ -279,7 +299,7 @@ static inline void flush_tlb_all(void)
{
dsb(ishst);
__tlbi(vmalle1is);
- dsb(ish);
+ __tlbi_sync_s1ish();
isb();
}
@@ -291,7 +311,7 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
asid = __TLBI_VADDR(0, ASID(mm));
__tlbi(aside1is, asid);
__tlbi_user(aside1is, asid);
- dsb(ish);
+ __tlbi_sync_s1ish();
mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
}
@@ -345,20 +365,11 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
unsigned long uaddr)
{
flush_tlb_page_nosync(vma, uaddr);
- dsb(ish);
+ __tlbi_sync_s1ish();
}
static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm)
{
- /*
- * TLB flush deferral is not required on systems which are affected by
- * ARM64_WORKAROUND_REPEAT_TLBI, as __tlbi()/__tlbi_user() implementation
- * will have two consecutive TLBI instructions with a dsb(ish) in between
- * defeating the purpose (i.e save overall 'dsb ish' cost).
- */
- if (alternative_has_cap_unlikely(ARM64_WORKAROUND_REPEAT_TLBI))
- return false;
-
return true;
}
@@ -374,7 +385,7 @@ static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm)
*/
static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
{
- dsb(ish);
+ __tlbi_sync_s1ish();
}
/*
@@ -509,7 +520,7 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
{
__flush_tlb_range_nosync(vma->vm_mm, start, end, stride,
last_level, tlb_level);
- dsb(ish);
+ __tlbi_sync_s1ish();
}
static inline void local_flush_tlb_contpte(struct vm_area_struct *vma,
@@ -557,7 +568,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end
dsb(ishst);
__flush_tlb_range_op(vaale1is, start, pages, stride, 0,
TLBI_TTL_UNKNOWN, false, lpa2_is_enabled());
- dsb(ish);
+ __tlbi_sync_s1ish();
isb();
}
@@ -571,7 +582,7 @@ static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr)
dsb(ishst);
__tlbi(vaae1is, addr);
- dsb(ish);
+ __tlbi_sync_s1ish();
isb();
}
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
index af90128cfed5..a9d884fd1d00 100644
--- a/arch/arm64/kernel/acpi.c
+++ b/arch/arm64/kernel/acpi.c
@@ -377,7 +377,7 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
prot = __acpi_get_writethrough_mem_attribute();
}
}
- return ioremap_prot(phys, size, prot);
+ return __ioremap_prot(phys, size, prot);
}
/*
diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
index 4a609e9b65de..b9d4998c97ef 100644
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c
@@ -37,7 +37,7 @@ __do_compat_cache_op(unsigned long start, unsigned long end)
* We pick the reserved-ASID to minimise the impact.
*/
__tlbi(aside1is, __TLBI_VADDR(0, 0));
- dsb(ish);
+ __tlbi_sync_s1ish();
}
ret = caches_clean_inval_user_pou(start, start + chunk);
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 3fe1faab0362..b32f13358fbb 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -400,16 +400,25 @@ static inline
int counters_read_on_cpu(int cpu, smp_call_func_t func, u64 *val)
{
/*
- * Abort call on counterless CPU or when interrupts are
- * disabled - can lead to deadlock in smp sync call.
+ * Abort call on counterless CPU.
*/
if (!cpu_has_amu_feat(cpu))
return -EOPNOTSUPP;
- if (WARN_ON_ONCE(irqs_disabled()))
- return -EPERM;
-
- smp_call_function_single(cpu, func, val, 1);
+ if (irqs_disabled()) {
+ /*
+ * When IRQs are disabled (tick path: sched_tick ->
+ * topology_scale_freq_tick or cppc_scale_freq_tick), only local
+ * CPU counter reads are allowed. Remote CPU counter read would
+ * require smp_call_function_single() which is unsafe with IRQs
+ * disabled.
+ */
+ if (WARN_ON_ONCE(cpu != smp_processor_id()))
+ return -EPERM;
+ func(val);
+ } else {
+ smp_call_function_single(cpu, func, val, 1);
+ }
return 0;
}
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 4f803fd1c99a..7d1f22fd490b 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -21,7 +21,6 @@ menuconfig KVM
bool "Kernel-based Virtual Machine (KVM) support"
select KVM_COMMON
select KVM_GENERIC_HARDWARE_ENABLING
- select KVM_GENERIC_MMU_NOTIFIER
select HAVE_KVM_CPU_RELAX_INTERCEPT
select KVM_MMIO
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 29f0326f7e00..410ffd41fd73 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -358,7 +358,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
break;
case KVM_CAP_IOEVENTFD:
case KVM_CAP_USER_MEMORY:
- case KVM_CAP_SYNC_MMU:
case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
case KVM_CAP_ONE_REG:
case KVM_CAP_ARM_PSCI:
diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c
index 885bd5bb2f41..6588ea251ed7 100644
--- a/arch/arm64/kvm/at.c
+++ b/arch/arm64/kvm/at.c
@@ -540,31 +540,8 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
wr->pa |= va & GENMASK_ULL(va_bottom - 1, 0);
wr->nG = (wi->regime != TR_EL2) && (desc & PTE_NG);
- if (wr->nG) {
- u64 asid_ttbr, tcr;
-
- switch (wi->regime) {
- case TR_EL10:
- tcr = vcpu_read_sys_reg(vcpu, TCR_EL1);
- asid_ttbr = ((tcr & TCR_A1) ?
- vcpu_read_sys_reg(vcpu, TTBR1_EL1) :
- vcpu_read_sys_reg(vcpu, TTBR0_EL1));
- break;
- case TR_EL20:
- tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
- asid_ttbr = ((tcr & TCR_A1) ?
- vcpu_read_sys_reg(vcpu, TTBR1_EL2) :
- vcpu_read_sys_reg(vcpu, TTBR0_EL2));
- break;
- default:
- BUG();
- }
-
- wr->asid = FIELD_GET(TTBR_ASID_MASK, asid_ttbr);
- if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, ASIDBITS, 16) ||
- !(tcr & TCR_ASID16))
- wr->asid &= GENMASK(7, 0);
- }
+ if (wr->nG)
+ wr->asid = get_asid_by_regime(vcpu, wi->regime);
return 0;
diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
index ae8391baebc3..218976287d3f 100644
--- a/arch/arm64/kvm/hyp/nvhe/mm.c
+++ b/arch/arm64/kvm/hyp/nvhe/mm.c
@@ -271,7 +271,7 @@ static void fixmap_clear_slot(struct hyp_fixmap_slot *slot)
*/
dsb(ishst);
__tlbi_level(vale2is, __TLBI_VADDR(addr, 0), level);
- dsb(ish);
+ __tlbi_sync_s1ish_hyp();
isb();
}
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 8e29d7734a15..2f029bfe4755 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -342,6 +342,7 @@ static void pkvm_init_features_from_host(struct pkvm_hyp_vm *hyp_vm, const struc
/* No restrictions for non-protected VMs. */
if (!kvm_vm_is_protected(kvm)) {
hyp_vm->kvm.arch.flags = host_arch_flags;
+ hyp_vm->kvm.arch.flags &= ~BIT_ULL(KVM_ARCH_FLAG_ID_REGS_INITIALIZED);
bitmap_copy(kvm->arch.vcpu_features,
host_kvm->arch.vcpu_features,
@@ -391,7 +392,7 @@ static void unpin_host_sve_state(struct pkvm_hyp_vcpu *hyp_vcpu)
if (!vcpu_has_feature(&hyp_vcpu->vcpu, KVM_ARM_VCPU_SVE))
return;
- sve_state = kern_hyp_va(hyp_vcpu->vcpu.arch.sve_state);
+ sve_state = hyp_vcpu->vcpu.arch.sve_state;
hyp_unpin_shared_mem(sve_state,
sve_state + vcpu_sve_state_size(&hyp_vcpu->vcpu));
}
@@ -471,6 +472,35 @@ err:
return ret;
}
+static int vm_copy_id_regs(struct pkvm_hyp_vcpu *hyp_vcpu)
+{
+ struct pkvm_hyp_vm *hyp_vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
+ const struct kvm *host_kvm = hyp_vm->host_kvm;
+ struct kvm *kvm = &hyp_vm->kvm;
+
+ if (!test_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &host_kvm->arch.flags))
+ return -EINVAL;
+
+ if (test_and_set_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags))
+ return 0;
+
+ memcpy(kvm->arch.id_regs, host_kvm->arch.id_regs, sizeof(kvm->arch.id_regs));
+
+ return 0;
+}
+
+static int pkvm_vcpu_init_sysregs(struct pkvm_hyp_vcpu *hyp_vcpu)
+{
+ int ret = 0;
+
+ if (pkvm_hyp_vcpu_is_protected(hyp_vcpu))
+ kvm_init_pvm_id_regs(&hyp_vcpu->vcpu);
+ else
+ ret = vm_copy_id_regs(hyp_vcpu);
+
+ return ret;
+}
+
static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu,
struct pkvm_hyp_vm *hyp_vm,
struct kvm_vcpu *host_vcpu)
@@ -490,8 +520,9 @@ static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu,
hyp_vcpu->vcpu.arch.cflags = READ_ONCE(host_vcpu->arch.cflags);
hyp_vcpu->vcpu.arch.mp_state.mp_state = KVM_MP_STATE_STOPPED;
- if (pkvm_hyp_vcpu_is_protected(hyp_vcpu))
- kvm_init_pvm_id_regs(&hyp_vcpu->vcpu);
+ ret = pkvm_vcpu_init_sysregs(hyp_vcpu);
+ if (ret)
+ goto done;
ret = pkvm_vcpu_init_traps(hyp_vcpu);
if (ret)
diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
index 48da9ca9763f..3dc1ce0d27fe 100644
--- a/arch/arm64/kvm/hyp/nvhe/tlb.c
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -169,7 +169,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
*/
dsb(ish);
__tlbi(vmalle1is);
- dsb(ish);
+ __tlbi_sync_s1ish_hyp();
isb();
exit_vmid_context(&cxt);
@@ -226,7 +226,7 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
dsb(ish);
__tlbi(vmalle1is);
- dsb(ish);
+ __tlbi_sync_s1ish_hyp();
isb();
exit_vmid_context(&cxt);
@@ -240,7 +240,7 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
enter_vmid_context(mmu, &cxt, false);
__tlbi(vmalls12e1is);
- dsb(ish);
+ __tlbi_sync_s1ish_hyp();
isb();
exit_vmid_context(&cxt);
@@ -266,5 +266,5 @@ void __kvm_flush_vm_context(void)
/* Same remark as in enter_vmid_context() */
dsb(ish);
__tlbi(alle1is);
- dsb(ish);
+ __tlbi_sync_s1ish_hyp();
}
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 0e4ddd28ef5d..9b480f947da2 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -501,7 +501,7 @@ static int hyp_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
*unmapped += granule;
}
- dsb(ish);
+ __tlbi_sync_s1ish_hyp();
isb();
mm_ops->put_page(ctx->ptep);
diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c
index ec2569818629..35855dadfb1b 100644
--- a/arch/arm64/kvm/hyp/vhe/tlb.c
+++ b/arch/arm64/kvm/hyp/vhe/tlb.c
@@ -115,7 +115,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
*/
dsb(ish);
__tlbi(vmalle1is);
- dsb(ish);
+ __tlbi_sync_s1ish_hyp();
isb();
exit_vmid_context(&cxt);
@@ -176,7 +176,7 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
dsb(ish);
__tlbi(vmalle1is);
- dsb(ish);
+ __tlbi_sync_s1ish_hyp();
isb();
exit_vmid_context(&cxt);
@@ -192,7 +192,7 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
enter_vmid_context(mmu, &cxt);
__tlbi(vmalls12e1is);
- dsb(ish);
+ __tlbi_sync_s1ish_hyp();
isb();
exit_vmid_context(&cxt);
@@ -217,7 +217,7 @@ void __kvm_flush_vm_context(void)
{
dsb(ishst);
__tlbi(alle1is);
- dsb(ish);
+ __tlbi_sync_s1ish_hyp();
}
/*
@@ -358,7 +358,7 @@ int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding)
default:
ret = -EINVAL;
}
- dsb(ish);
+ __tlbi_sync_s1ish_hyp();
isb();
if (mmu)
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 070a01e53fcb..ec2eee857208 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1754,14 +1754,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
}
/*
- * Both the canonical IPA and fault IPA must be hugepage-aligned to
- * ensure we find the right PFN and lay down the mapping in the right
- * place.
+ * Both the canonical IPA and fault IPA must be aligned to the
+ * mapping size to ensure we find the right PFN and lay down the
+ * mapping in the right place.
*/
- if (vma_pagesize == PMD_SIZE || vma_pagesize == PUD_SIZE) {
- fault_ipa &= ~(vma_pagesize - 1);
- ipa &= ~(vma_pagesize - 1);
- }
+ fault_ipa = ALIGN_DOWN(fault_ipa, vma_pagesize);
+ ipa = ALIGN_DOWN(ipa, vma_pagesize);
gfn = ipa >> PAGE_SHIFT;
mte_allowed = kvm_vma_mte_allowed(vma);
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index 620126d1f0dc..12c9f6e8dfda 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -854,6 +854,33 @@ int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2)
return kvm_inject_nested_sync(vcpu, esr_el2);
}
+u16 get_asid_by_regime(struct kvm_vcpu *vcpu, enum trans_regime regime)
+{
+ enum vcpu_sysreg ttbr_elx;
+ u64 tcr;
+ u16 asid;
+
+ switch (regime) {
+ case TR_EL10:
+ tcr = vcpu_read_sys_reg(vcpu, TCR_EL1);
+ ttbr_elx = (tcr & TCR_A1) ? TTBR1_EL1 : TTBR0_EL1;
+ break;
+ case TR_EL20:
+ tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
+ ttbr_elx = (tcr & TCR_A1) ? TTBR1_EL2 : TTBR0_EL2;
+ break;
+ default:
+ BUG();
+ }
+
+ asid = FIELD_GET(TTBRx_EL1_ASID, vcpu_read_sys_reg(vcpu, ttbr_elx));
+ if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, ASIDBITS, 16) ||
+ !(tcr & TCR_ASID16))
+ asid &= GENMASK(7, 0);
+
+ return asid;
+}
+
static void invalidate_vncr(struct vncr_tlb *vt)
{
vt->valid = false;
@@ -1154,9 +1181,6 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm)
{
int i;
- if (!kvm->arch.nested_mmus_size)
- return;
-
for (i = 0; i < kvm->arch.nested_mmus_size; i++) {
struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i];
@@ -1336,20 +1360,8 @@ static bool kvm_vncr_tlb_lookup(struct kvm_vcpu *vcpu)
if (read_vncr_el2(vcpu) != vt->gva)
return false;
- if (vt->wr.nG) {
- u64 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
- u64 ttbr = ((tcr & TCR_A1) ?
- vcpu_read_sys_reg(vcpu, TTBR1_EL2) :
- vcpu_read_sys_reg(vcpu, TTBR0_EL2));
- u16 asid;
-
- asid = FIELD_GET(TTBR_ASID_MASK, ttbr);
- if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, ASIDBITS, 16) ||
- !(tcr & TCR_ASID16))
- asid &= GENMASK(7, 0);
-
- return asid == vt->wr.asid;
- }
+ if (vt->wr.nG)
+ return get_asid_by_regime(vcpu, TR_EL20) == vt->wr.asid;
return true;
}
@@ -1452,21 +1464,8 @@ static void kvm_map_l1_vncr(struct kvm_vcpu *vcpu)
if (read_vncr_el2(vcpu) != vt->gva)
return;
- if (vt->wr.nG) {
- u64 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
- u64 ttbr = ((tcr & TCR_A1) ?
- vcpu_read_sys_reg(vcpu, TTBR1_EL2) :
- vcpu_read_sys_reg(vcpu, TTBR0_EL2));
- u16 asid;
-
- asid = FIELD_GET(TTBR_ASID_MASK, ttbr);
- if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, ASIDBITS, 16) ||
- !(tcr & TCR_ASID16))
- asid &= GENMASK(7, 0);
-
- if (asid != vt->wr.asid)
- return;
- }
+ if (vt->wr.nG && get_asid_by_regime(vcpu, TR_EL20) != vt->wr.asid)
+ return;
vt->cpu = smp_processor_id();
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index a7cd0badc20c..1b4cacb6e918 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1816,6 +1816,9 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
ID_AA64MMFR3_EL1_SCTLRX |
ID_AA64MMFR3_EL1_S1POE |
ID_AA64MMFR3_EL1_S1PIE;
+
+ if (!system_supports_poe())
+ val &= ~ID_AA64MMFR3_EL1_S1POE;
break;
case SYS_ID_MMFR4_EL1:
val &= ~ID_MMFR4_EL1_CCIDX;
diff --git a/arch/arm64/lib/delay.c b/arch/arm64/lib/delay.c
index d02341303899..e278e060e78a 100644
--- a/arch/arm64/lib/delay.c
+++ b/arch/arm64/lib/delay.c
@@ -32,7 +32,11 @@ static inline unsigned long xloops_to_cycles(unsigned long xloops)
* Note that userspace cannot change the offset behind our back either,
* as the vcpu mutex is held as long as KVM_RUN is in progress.
*/
-#define __delay_cycles() __arch_counter_get_cntvct_stable()
+static cycles_t notrace __delay_cycles(void)
+{
+ guard(preempt_notrace)();
+ return __arch_counter_get_cntvct_stable();
+}
void __delay(unsigned long cycles)
{
diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c
index b12cbed9b5ad..6eef48ef6278 100644
--- a/arch/arm64/mm/ioremap.c
+++ b/arch/arm64/mm/ioremap.c
@@ -14,8 +14,8 @@ int arm64_ioremap_prot_hook_register(ioremap_prot_hook_t hook)
return 0;
}
-void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
- pgprot_t pgprot)
+void __iomem *__ioremap_prot(phys_addr_t phys_addr, size_t size,
+ pgprot_t pgprot)
{
unsigned long last_addr = phys_addr + size - 1;
@@ -39,7 +39,7 @@ void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
return generic_ioremap_prot(phys_addr, size, pgprot);
}
-EXPORT_SYMBOL(ioremap_prot);
+EXPORT_SYMBOL(__ioremap_prot);
/*
* Must be called after early_fixmap_init
diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index 08ee177432c2..92b2f5097a96 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c
@@ -34,6 +34,8 @@ static pgprot_t protection_map[16] __ro_after_init = {
[VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_SHARED_EXEC
};
+static ptdesc_t gcs_page_prot __ro_after_init = _PAGE_GCS_RO;
+
/*
* You really shouldn't be using read() or write() on /dev/mem. This might go
* away in the future.
@@ -73,9 +75,11 @@ static int __init adjust_protection_map(void)
protection_map[VM_EXEC | VM_SHARED] = PAGE_EXECONLY;
}
- if (lpa2_is_enabled())
+ if (lpa2_is_enabled()) {
for (int i = 0; i < ARRAY_SIZE(protection_map); i++)
pgprot_val(protection_map[i]) &= ~PTE_SHARED;
+ gcs_page_prot &= ~PTE_SHARED;
+ }
return 0;
}
@@ -87,7 +91,11 @@ pgprot_t vm_get_page_prot(vm_flags_t vm_flags)
/* Short circuit GCS to avoid bloating the table. */
if (system_supports_gcs() && (vm_flags & VM_SHADOW_STACK)) {
- prot = _PAGE_GCS_RO;
+ /* Honour mprotect(PROT_NONE) on shadow stack mappings */
+ if (vm_flags & VM_ACCESS_FLAGS)
+ prot = gcs_page_prot;
+ else
+ prot = pgprot_val(protection_map[VM_NONE]);
} else {
prot = pgprot_val(protection_map[vm_flags &
(VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]);
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 356d33c7a4ae..adf84962d579 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -2119,7 +2119,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
extable_offset = round_up(prog_size + PLT_TARGET_SIZE, extable_align);
image_size = extable_offset + extable_size;
ro_header = bpf_jit_binary_pack_alloc(image_size, &ro_image_ptr,
- sizeof(u32), &header, &image_ptr,
+ sizeof(u64), &header, &image_ptr,
jit_fill_hole);
if (!ro_header) {
prog = orig_prog;
diff --git a/arch/loongarch/kvm/Kconfig b/arch/loongarch/kvm/Kconfig
index ed4f724db774..8e5213609975 100644
--- a/arch/loongarch/kvm/Kconfig
+++ b/arch/loongarch/kvm/Kconfig
@@ -28,7 +28,6 @@ config KVM
select KVM_COMMON
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select KVM_GENERIC_HARDWARE_ENABLING
- select KVM_GENERIC_MMU_NOTIFIER
select KVM_MMIO
select VIRT_XFER_TO_GUEST_WORK
select SCHED_INFO
diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c
index 63fd40530aa9..5282158b8122 100644
--- a/arch/loongarch/kvm/vm.c
+++ b/arch/loongarch/kvm/vm.c
@@ -118,7 +118,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_ONE_REG:
case KVM_CAP_ENABLE_CAP:
case KVM_CAP_READONLY_MEM:
- case KVM_CAP_SYNC_MMU:
case KVM_CAP_IMMEDIATE_EXIT:
case KVM_CAP_IOEVENTFD:
case KVM_CAP_MP_STATE:
diff --git a/arch/mips/kvm/Kconfig b/arch/mips/kvm/Kconfig
index cc13cc35f208..b1b9a1d67758 100644
--- a/arch/mips/kvm/Kconfig
+++ b/arch/mips/kvm/Kconfig
@@ -23,7 +23,6 @@ config KVM
select KVM_COMMON
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select KVM_MMIO
- select KVM_GENERIC_MMU_NOTIFIER
select KVM_GENERIC_HARDWARE_ENABLING
select HAVE_KVM_READONLY_MEM
help
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index b0fb92fda4d4..29d9f630edfb 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -1035,7 +1035,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_ONE_REG:
case KVM_CAP_ENABLE_CAP:
case KVM_CAP_READONLY_MEM:
- case KVM_CAP_SYNC_MMU:
case KVM_CAP_IMMEDIATE_EXIT:
r = 1;
break;
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index c9a2d50ff1b0..9a0d1c1aca6c 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -38,7 +38,6 @@ config KVM_BOOK3S_64_HANDLER
config KVM_BOOK3S_PR_POSSIBLE
bool
select KVM_MMIO
- select KVM_GENERIC_MMU_NOTIFIER
config KVM_BOOK3S_HV_POSSIBLE
bool
@@ -81,7 +80,6 @@ config KVM_BOOK3S_64_HV
tristate "KVM for POWER7 and later using hypervisor mode in host"
depends on KVM_BOOK3S_64 && PPC_POWERNV
select KVM_BOOK3S_HV_POSSIBLE
- select KVM_GENERIC_MMU_NOTIFIER
select KVM_BOOK3S_HV_PMU
select CMA
help
@@ -203,7 +201,6 @@ config KVM_E500V2
depends on !CONTEXT_TRACKING_USER
select KVM
select KVM_MMIO
- select KVM_GENERIC_MMU_NOTIFIER
help
Support running unmodified E500 guest kernels in virtual machines on
E500v2 host processors.
@@ -220,7 +217,6 @@ config KVM_E500MC
select KVM
select KVM_MMIO
select KVM_BOOKE_HV
- select KVM_GENERIC_MMU_NOTIFIER
help
Support running unmodified E500MC/E5500/E6500 guest kernels in
virtual machines on E500MC/E5500/E6500 host processors.
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 9a89a6d98f97..00302399fc37 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -623,12 +623,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = !!(hv_enabled && kvmppc_hv_ops->enable_nested &&
!kvmppc_hv_ops->enable_nested(NULL));
break;
-#endif
- case KVM_CAP_SYNC_MMU:
- BUILD_BUG_ON(!IS_ENABLED(CONFIG_KVM_GENERIC_MMU_NOTIFIER));
- r = 1;
- break;
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
case KVM_CAP_PPC_HTAB_FD:
r = hv_enabled;
break;
diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig
index 77379f77840a..ec2cee0a39e0 100644
--- a/arch/riscv/kvm/Kconfig
+++ b/arch/riscv/kvm/Kconfig
@@ -30,7 +30,6 @@ config KVM
select KVM_GENERIC_HARDWARE_ENABLING
select KVM_MMIO
select VIRT_XFER_TO_GUEST_WORK
- select KVM_GENERIC_MMU_NOTIFIER
select SCHED_INFO
select GUEST_PERF_EVENTS if PERF_EVENTS
help
diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c
index 7cbd2340c190..58bce57dc55b 100644
--- a/arch/riscv/kvm/vm.c
+++ b/arch/riscv/kvm/vm.c
@@ -181,7 +181,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
break;
case KVM_CAP_IOEVENTFD:
case KVM_CAP_USER_MEMORY:
- case KVM_CAP_SYNC_MMU:
case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
case KVM_CAP_ONE_REG:
case KVM_CAP_READONLY_MEM:
diff --git a/arch/s390/include/asm/idle.h b/arch/s390/include/asm/idle.h
index 09f763b9eb40..32536ee34aa0 100644
--- a/arch/s390/include/asm/idle.h
+++ b/arch/s390/include/asm/idle.h
@@ -19,9 +19,9 @@ struct s390_idle_data {
unsigned long mt_cycles_enter[8];
};
+DECLARE_PER_CPU(struct s390_idle_data, s390_idle);
+
extern struct device_attribute dev_attr_idle_count;
extern struct device_attribute dev_attr_idle_time_us;
-void psw_idle(struct s390_idle_data *data, unsigned long psw_mask);
-
#endif /* _S390_IDLE_H */
diff --git a/arch/s390/include/asm/vtime.h b/arch/s390/include/asm/vtime.h
index 9d25fb35a042..b1db75d14e9d 100644
--- a/arch/s390/include/asm/vtime.h
+++ b/arch/s390/include/asm/vtime.h
@@ -2,6 +2,12 @@
#ifndef _S390_VTIME_H
#define _S390_VTIME_H
+#include <asm/lowcore.h>
+#include <asm/cpu_mf.h>
+#include <asm/idle.h>
+
+DECLARE_PER_CPU(u64, mt_cycles[8]);
+
static inline void update_timer_sys(void)
{
struct lowcore *lc = get_lowcore();
@@ -20,4 +26,32 @@ static inline void update_timer_mcck(void)
lc->last_update_timer = lc->mcck_enter_timer;
}
+static inline void update_timer_idle(void)
+{
+ struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
+ struct lowcore *lc = get_lowcore();
+ u64 cycles_new[8];
+ int i, mtid;
+
+ mtid = smp_cpu_mtid;
+ if (mtid) {
+ stcctm(MT_DIAG, mtid, cycles_new);
+ for (i = 0; i < mtid; i++)
+ __this_cpu_add(mt_cycles[i], cycles_new[i] - idle->mt_cycles_enter[i]);
+ }
+ /*
+ * This is a bit subtle: Forward last_update_clock so it excludes idle
+ * time. For correct steal time calculation in do_account_vtime() add
+ * passed wall time before idle_enter to steal_timer:
+ * During the passed wall time before idle_enter CPU time may have
+ * been accounted to system, hardirq, softirq, etc. lowcore fields.
+ * The accounted CPU times will be subtracted again from steal_timer
+ * when accumulated steal time is calculated in do_account_vtime().
+ */
+ lc->steal_timer += idle->clock_idle_enter - lc->last_update_clock;
+ lc->last_update_clock = lc->int_clock;
+ lc->system_timer += lc->last_update_timer - idle->timer_idle_enter;
+ lc->last_update_timer = lc->sys_enter_timer;
+}
+
#endif /* _S390_VTIME_H */
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index dd55cc6bbc28..fb67b4abe68c 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -56,8 +56,6 @@ long sys_s390_pci_mmio_write(unsigned long, const void __user *, size_t);
long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t);
long sys_s390_sthyi(unsigned long function_code, void __user *buffer, u64 __user *return_code, unsigned long flags);
-DECLARE_PER_CPU(u64, mt_cycles[8]);
-
unsigned long stack_alloc(void);
void stack_free(unsigned long stack);
diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
index 39cb8d0ae348..1f1b06b6b4ef 100644
--- a/arch/s390/kernel/idle.c
+++ b/arch/s390/kernel/idle.c
@@ -15,37 +15,22 @@
#include <trace/events/power.h>
#include <asm/cpu_mf.h>
#include <asm/cputime.h>
+#include <asm/idle.h>
#include <asm/nmi.h>
#include <asm/smp.h>
-#include "entry.h"
-static DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
+DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
void account_idle_time_irq(void)
{
struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
- struct lowcore *lc = get_lowcore();
unsigned long idle_time;
- u64 cycles_new[8];
- int i;
- if (smp_cpu_mtid) {
- stcctm(MT_DIAG, smp_cpu_mtid, cycles_new);
- for (i = 0; i < smp_cpu_mtid; i++)
- this_cpu_add(mt_cycles[i], cycles_new[i] - idle->mt_cycles_enter[i]);
- }
-
- idle_time = lc->int_clock - idle->clock_idle_enter;
-
- lc->steal_timer += idle->clock_idle_enter - lc->last_update_clock;
- lc->last_update_clock = lc->int_clock;
-
- lc->system_timer += lc->last_update_timer - idle->timer_idle_enter;
- lc->last_update_timer = lc->sys_enter_timer;
+ idle_time = get_lowcore()->int_clock - idle->clock_idle_enter;
/* Account time spent with enabled wait psw loaded as idle time. */
- WRITE_ONCE(idle->idle_time, READ_ONCE(idle->idle_time) + idle_time);
- WRITE_ONCE(idle->idle_count, READ_ONCE(idle->idle_count) + 1);
+ __atomic64_add(idle_time, &idle->idle_time);
+ __atomic64_add_const(1, &idle->idle_count);
account_idle_time(cputime_to_nsecs(idle_time));
}
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index dcdc7e274848..049c557c452f 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -2377,7 +2377,7 @@ void __init setup_ipl(void)
atomic_notifier_chain_register(&panic_notifier_list, &on_panic_nb);
}
-void s390_reset_system(void)
+void __no_stack_protector s390_reset_system(void)
{
/* Disable prefixing */
set_prefix(0);
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index f81723bc8856..7fdf960191d3 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -146,6 +146,12 @@ void noinstr do_io_irq(struct pt_regs *regs)
struct pt_regs *old_regs = set_irq_regs(regs);
bool from_idle;
+ from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT);
+ if (from_idle) {
+ update_timer_idle();
+ regs->psw.mask &= ~(PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_WAIT);
+ }
+
irq_enter_rcu();
if (user_mode(regs)) {
@@ -154,7 +160,6 @@ void noinstr do_io_irq(struct pt_regs *regs)
current->thread.last_break = regs->last_break;
}
- from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT);
if (from_idle)
account_idle_time_irq();
@@ -171,9 +176,6 @@ void noinstr do_io_irq(struct pt_regs *regs)
set_irq_regs(old_regs);
irqentry_exit(regs, state);
-
- if (from_idle)
- regs->psw.mask &= ~(PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_WAIT);
}
void noinstr do_ext_irq(struct pt_regs *regs)
@@ -182,6 +184,12 @@ void noinstr do_ext_irq(struct pt_regs *regs)
struct pt_regs *old_regs = set_irq_regs(regs);
bool from_idle;
+ from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT);
+ if (from_idle) {
+ update_timer_idle();
+ regs->psw.mask &= ~(PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_WAIT);
+ }
+
irq_enter_rcu();
if (user_mode(regs)) {
@@ -194,7 +202,6 @@ void noinstr do_ext_irq(struct pt_regs *regs)
regs->int_parm = get_lowcore()->ext_params;
regs->int_parm_long = get_lowcore()->ext_params2;
- from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT);
if (from_idle)
account_idle_time_irq();
@@ -203,9 +210,6 @@ void noinstr do_ext_irq(struct pt_regs *regs)
irq_exit_rcu();
set_irq_regs(old_regs);
irqentry_exit(regs, state);
-
- if (from_idle)
- regs->psw.mask &= ~(PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_WAIT);
}
static void show_msi_interrupt(struct seq_file *p, int irq)
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 234a0ba30510..bf48744d0912 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -48,8 +48,7 @@ static inline void set_vtimer(u64 expires)
static inline int virt_timer_forward(u64 elapsed)
{
- BUG_ON(!irqs_disabled());
-
+ lockdep_assert_irqs_disabled();
if (list_empty(&virt_timer_list))
return 0;
elapsed = atomic64_add_return(elapsed, &virt_timer_elapsed);
@@ -137,23 +136,16 @@ static int do_account_vtime(struct task_struct *tsk)
lc->system_timer += timer;
/* Update MT utilization calculation */
- if (smp_cpu_mtid &&
- time_after64(jiffies_64, this_cpu_read(mt_scaling_jiffies)))
+ if (smp_cpu_mtid && time_after64(jiffies_64, __this_cpu_read(mt_scaling_jiffies)))
update_mt_scaling();
/* Calculate cputime delta */
- user = update_tsk_timer(&tsk->thread.user_timer,
- READ_ONCE(lc->user_timer));
- guest = update_tsk_timer(&tsk->thread.guest_timer,
- READ_ONCE(lc->guest_timer));
- system = update_tsk_timer(&tsk->thread.system_timer,
- READ_ONCE(lc->system_timer));
- hardirq = update_tsk_timer(&tsk->thread.hardirq_timer,
- READ_ONCE(lc->hardirq_timer));
- softirq = update_tsk_timer(&tsk->thread.softirq_timer,
- READ_ONCE(lc->softirq_timer));
- lc->steal_timer +=
- clock - user - guest - system - hardirq - softirq;
+ user = update_tsk_timer(&tsk->thread.user_timer, lc->user_timer);
+ guest = update_tsk_timer(&tsk->thread.guest_timer, lc->guest_timer);
+ system = update_tsk_timer(&tsk->thread.system_timer, lc->system_timer);
+ hardirq = update_tsk_timer(&tsk->thread.hardirq_timer, lc->hardirq_timer);
+ softirq = update_tsk_timer(&tsk->thread.softirq_timer, lc->softirq_timer);
+ lc->steal_timer += clock - user - guest - system - hardirq - softirq;
/* Push account value */
if (user) {
@@ -225,10 +217,6 @@ static u64 vtime_delta(void)
return timer - lc->last_update_timer;
}
-/*
- * Update process times based on virtual cpu times stored by entry.S
- * to the lowcore fields user_timer, system_timer & steal_clock.
- */
void vtime_account_kernel(struct task_struct *tsk)
{
struct lowcore *lc = get_lowcore();
@@ -238,27 +226,17 @@ void vtime_account_kernel(struct task_struct *tsk)
lc->guest_timer += delta;
else
lc->system_timer += delta;
-
- virt_timer_forward(delta);
}
EXPORT_SYMBOL_GPL(vtime_account_kernel);
void vtime_account_softirq(struct task_struct *tsk)
{
- u64 delta = vtime_delta();
-
- get_lowcore()->softirq_timer += delta;
-
- virt_timer_forward(delta);
+ get_lowcore()->softirq_timer += vtime_delta();
}
void vtime_account_hardirq(struct task_struct *tsk)
{
- u64 delta = vtime_delta();
-
- get_lowcore()->hardirq_timer += delta;
-
- virt_timer_forward(delta);
+ get_lowcore()->hardirq_timer += vtime_delta();
}
/*
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 917ac740513e..5b835bc6a194 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -28,9 +28,7 @@ config KVM
select HAVE_KVM_INVALID_WAKEUPS
select HAVE_KVM_NO_POLL
select KVM_VFIO
- select MMU_NOTIFIER
select VIRT_XFER_TO_GUEST_WORK
- select KVM_GENERIC_MMU_NOTIFIER
select KVM_MMU_LOCKLESS_AGING
help
Support hosting paravirtualized guest machines using the SIE
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 7a175d86cef0..bc7d6fa66eaf 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -601,7 +601,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
switch (ext) {
case KVM_CAP_S390_PSW:
case KVM_CAP_S390_GMAP:
- case KVM_CAP_SYNC_MMU:
#ifdef CONFIG_KVM_S390_UCONTROL
case KVM_CAP_S390_UCONTROL:
#endif
diff --git a/arch/s390/mm/pfault.c b/arch/s390/mm/pfault.c
index 2f829448c719..6ecd6b0a22a8 100644
--- a/arch/s390/mm/pfault.c
+++ b/arch/s390/mm/pfault.c
@@ -62,7 +62,7 @@ int __pfault_init(void)
"0: nopr %%r7\n"
EX_TABLE(0b, 0b)
: [rc] "+d" (rc)
- : [refbk] "a" (&pfault_init_refbk), "m" (pfault_init_refbk)
+ : [refbk] "a" (virt_to_phys(&pfault_init_refbk)), "m" (pfault_init_refbk)
: "cc");
return rc;
}
@@ -84,7 +84,7 @@ void __pfault_fini(void)
"0: nopr %%r7\n"
EX_TABLE(0b, 0b)
:
- : [refbk] "a" (&pfault_fini_refbk), "m" (pfault_fini_refbk)
+ : [refbk] "a" (virt_to_phys(&pfault_fini_refbk)), "m" (pfault_fini_refbk)
: "cc");
}
diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c
index 46ef88bc9c26..7613ab0ffb89 100644
--- a/arch/sparc/kernel/iommu.c
+++ b/arch/sparc/kernel/iommu.c
@@ -312,6 +312,8 @@ static dma_addr_t dma_4u_map_phys(struct device *dev, phys_addr_t phys,
if (direction != DMA_TO_DEVICE)
iopte_protection |= IOPTE_WRITE;
+ phys &= IO_PAGE_MASK;
+
for (i = 0; i < npages; i++, base++, phys += IO_PAGE_SIZE)
iopte_val(*base) = iopte_protection | phys;
diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
index 440284cc804e..61f14b4c8f90 100644
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -410,6 +410,8 @@ static dma_addr_t dma_4v_map_phys(struct device *dev, phys_addr_t phys,
iommu_batch_start(dev, prot, entry);
+ phys &= IO_PAGE_MASK;
+
for (i = 0; i < npages; i++, phys += IO_PAGE_SIZE) {
long err = iommu_batch_add(phys, mask);
if (unlikely(err < 0L))
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 012b2bcaa8a0..20fc33300a95 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -69,11 +69,11 @@ struct io_thread_req {
};
-static struct io_thread_req * (*irq_req_buffer)[];
+static struct io_thread_req **irq_req_buffer;
static struct io_thread_req *irq_remainder;
static int irq_remainder_size;
-static struct io_thread_req * (*io_req_buffer)[];
+static struct io_thread_req **io_req_buffer;
static struct io_thread_req *io_remainder;
static int io_remainder_size;
@@ -398,7 +398,7 @@ static int thread_fd = -1;
static int bulk_req_safe_read(
int fd,
- struct io_thread_req * (*request_buffer)[],
+ struct io_thread_req **request_buffer,
struct io_thread_req **remainder,
int *remainder_size,
int max_recs
@@ -465,7 +465,7 @@ static irqreturn_t ubd_intr(int irq, void *dev)
&irq_remainder, &irq_remainder_size,
UBD_REQ_BUFFER_SIZE)) >= 0) {
for (i = 0; i < len / sizeof(struct io_thread_req *); i++)
- ubd_end_request((*irq_req_buffer)[i]);
+ ubd_end_request(irq_req_buffer[i]);
}
if (len < 0 && len != -EAGAIN)
@@ -1512,7 +1512,7 @@ void *io_thread(void *arg)
}
for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
- struct io_thread_req *req = (*io_req_buffer)[count];
+ struct io_thread_req *req = io_req_buffer[count];
int i;
io_count++;
diff --git a/arch/x86/entry/entry_fred.c b/arch/x86/entry/entry_fred.c
index a9b72997103d..88c757ac8ccd 100644
--- a/arch/x86/entry/entry_fred.c
+++ b/arch/x86/entry/entry_fred.c
@@ -160,8 +160,6 @@ void __init fred_complete_exception_setup(void)
static noinstr void fred_extint(struct pt_regs *regs)
{
unsigned int vector = regs->fred_ss.vector;
- unsigned int index = array_index_nospec(vector - FIRST_SYSTEM_VECTOR,
- NR_SYSTEM_VECTORS);
if (WARN_ON_ONCE(vector < FIRST_EXTERNAL_VECTOR))
return;
@@ -170,7 +168,8 @@ static noinstr void fred_extint(struct pt_regs *regs)
irqentry_state_t state = irqentry_enter(regs);
instrumentation_begin();
- sysvec_table[index](regs);
+ sysvec_table[array_index_nospec(vector - FIRST_SYSTEM_VECTOR,
+ NR_SYSTEM_VECTORS)](regs);
instrumentation_end();
irqentry_exit(regs, state);
} else {
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 5ed6e0b7e715..0a1d08136cc1 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -6497,6 +6497,32 @@ static struct intel_uncore_type gnr_uncore_ubox = {
.attr_update = uncore_alias_groups,
};
+static struct uncore_event_desc gnr_uncore_imc_events[] = {
+ INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x01,umask=0x00"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_read_sch0, "event=0x05,umask=0xcf"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_read_sch0.scale, "6.103515625e-5"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_read_sch0.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_read_sch1, "event=0x06,umask=0xcf"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_read_sch1.scale, "6.103515625e-5"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_read_sch1.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_write_sch0, "event=0x05,umask=0xf0"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_write_sch0.scale, "6.103515625e-5"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_write_sch0.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_write_sch1, "event=0x06,umask=0xf0"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_write_sch1.scale, "6.103515625e-5"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_write_sch1.unit, "MiB"),
+ { /* end: all zeroes */ },
+};
+
+static struct intel_uncore_type gnr_uncore_imc = {
+ SPR_UNCORE_MMIO_COMMON_FORMAT(),
+ .name = "imc",
+ .fixed_ctr_bits = 48,
+ .fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR,
+ .fixed_ctl = SNR_IMC_MMIO_PMON_FIXED_CTL,
+ .event_descs = gnr_uncore_imc_events,
+};
+
static struct intel_uncore_type gnr_uncore_pciex8 = {
SPR_UNCORE_PCI_COMMON_FORMAT(),
.name = "pciex8",
@@ -6544,7 +6570,7 @@ static struct intel_uncore_type *gnr_uncores[UNCORE_GNR_NUM_UNCORE_TYPES] = {
NULL,
&spr_uncore_pcu,
&gnr_uncore_ubox,
- &spr_uncore_imc,
+ &gnr_uncore_imc,
NULL,
&gnr_uncore_upi,
NULL,
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index 9b4e04690e1a..80c1696d8d59 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -7,7 +7,7 @@
#include <linux/objtool.h>
#include <asm/asm.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
struct bug_entry;
extern void __WARN_trap(struct bug_entry *bug, ...);
#endif
@@ -137,7 +137,7 @@ do { \
#ifdef HAVE_ARCH_BUG_FORMAT_ARGS
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/static_call_types.h>
DECLARE_STATIC_CALL(WARN_trap, __WARN_trap);
@@ -153,7 +153,7 @@ struct arch_va_list {
struct sysv_va_list args;
};
extern void *__warn_args(struct arch_va_list *args, struct pt_regs *regs);
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#define __WARN_bug_entry(flags, format) ({ \
struct bug_entry *bug; \
diff --git a/arch/x86/include/asm/cfi.h b/arch/x86/include/asm/cfi.h
index c40b9ebc1fb4..ab3fbbd947ed 100644
--- a/arch/x86/include/asm/cfi.h
+++ b/arch/x86/include/asm/cfi.h
@@ -111,6 +111,12 @@ extern bhi_thunk __bhi_args_end[];
struct pt_regs;
+#ifdef CONFIG_CALL_PADDING
+#define CFI_OFFSET (CONFIG_FUNCTION_PADDING_CFI+5)
+#else
+#define CFI_OFFSET 5
+#endif
+
#ifdef CONFIG_CFI
enum bug_trap_type handle_cfi_failure(struct pt_regs *regs);
#define __bpfcall
@@ -119,11 +125,9 @@ static inline int cfi_get_offset(void)
{
switch (cfi_mode) {
case CFI_FINEIBT:
- return 16;
+ return /* fineibt_prefix_size */ 16;
case CFI_KCFI:
- if (IS_ENABLED(CONFIG_CALL_PADDING))
- return 16;
- return 5;
+ return CFI_OFFSET;
default:
return 0;
}
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index a1193e9d65f2..462754b0bf8a 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -77,7 +77,7 @@ static __always_inline void native_local_irq_restore(unsigned long flags)
#endif
#ifndef CONFIG_PARAVIRT
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* Used in the idle loop; sti takes one instruction cycle
* to complete:
@@ -95,7 +95,7 @@ static __always_inline void halt(void)
{
native_halt();
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* CONFIG_PARAVIRT */
#ifdef CONFIG_PARAVIRT_XXL
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index 9d38ae744a2e..a7294656ad90 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -68,7 +68,7 @@
* Depending on -fpatchable-function-entry=N,N usage (CONFIG_CALL_PADDING) the
* CFI symbol layout changes.
*
- * Without CALL_THUNKS:
+ * Without CALL_PADDING:
*
* .align FUNCTION_ALIGNMENT
* __cfi_##name:
@@ -77,7 +77,7 @@
* .long __kcfi_typeid_##name
* name:
*
- * With CALL_THUNKS:
+ * With CALL_PADDING:
*
* .align FUNCTION_ALIGNMENT
* __cfi_##name:
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index c55058f3d75e..409981468cba 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -20,7 +20,7 @@
#define PER_CPU_VAR(var) __percpu(var)__percpu_rel
-#else /* !__ASSEMBLY__: */
+#else /* !__ASSEMBLER__: */
#include <linux/args.h>
#include <linux/bits.h>
diff --git a/arch/x86/include/asm/runtime-const.h b/arch/x86/include/asm/runtime-const.h
index e5a13dc8816e..4cd94fdcb45e 100644
--- a/arch/x86/include/asm/runtime-const.h
+++ b/arch/x86/include/asm/runtime-const.h
@@ -6,7 +6,7 @@
#error "Cannot use runtime-const infrastructure from modules"
#endif
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
.macro RUNTIME_CONST_PTR sym reg
movq $0x0123456789abcdef, %\reg
@@ -16,7 +16,7 @@
.popsection
.endm
-#else /* __ASSEMBLY__ */
+#else /* __ASSEMBLER__ */
#define runtime_const_ptr(sym) ({ \
typeof(sym) __ret; \
@@ -74,5 +74,5 @@ static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
}
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 869b88061801..3f24cc472ce9 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -25,6 +25,8 @@ extern int ibt_selftest_noendbr(void);
void handle_invalid_op(struct pt_regs *regs);
#endif
+noinstr bool handle_bug(struct pt_regs *regs);
+
static inline int get_si_code(unsigned long condition)
{
if (condition & DR_STEP)
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index a888ae0f01fb..e87da25d1236 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -1182,7 +1182,7 @@ void __init_or_module noinline apply_seal_endbr(s32 *start, s32 *end)
poison_endbr(addr);
if (IS_ENABLED(CONFIG_FINEIBT))
- poison_cfi(addr - 16);
+ poison_cfi(addr - CFI_OFFSET);
}
}
@@ -1389,6 +1389,8 @@ extern u8 fineibt_preamble_end[];
#define fineibt_preamble_ud 0x13
#define fineibt_preamble_hash 5
+#define fineibt_prefix_size (fineibt_preamble_size - ENDBR_INSN_SIZE)
+
/*
* <fineibt_caller_start>:
* 0: b8 78 56 34 12 mov $0x12345678, %eax
@@ -1634,7 +1636,7 @@ static int cfi_rewrite_preamble(s32 *start, s32 *end)
* have determined there are no indirect calls to it and we
* don't need no CFI either.
*/
- if (!is_endbr(addr + 16))
+ if (!is_endbr(addr + CFI_OFFSET))
continue;
hash = decode_preamble_hash(addr, &arity);
@@ -1642,6 +1644,15 @@ static int cfi_rewrite_preamble(s32 *start, s32 *end)
addr, addr, 5, addr))
return -EINVAL;
+ /*
+ * FineIBT relies on being at func-16, so if the preamble is
+ * actually larger than that, place it the tail end.
+ *
+ * NOTE: this is possible with things like DEBUG_CALL_THUNKS
+ * and DEBUG_FORCE_FUNCTION_ALIGN_64B.
+ */
+ addr += CFI_OFFSET - fineibt_prefix_size;
+
text_poke_early(addr, fineibt_preamble_start, fineibt_preamble_size);
WARN_ON(*(u32 *)(addr + fineibt_preamble_hash) != 0x12345678);
text_poke_early(addr + fineibt_preamble_hash, &hash, 4);
@@ -1664,10 +1675,10 @@ static void cfi_rewrite_endbr(s32 *start, s32 *end)
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
- if (!exact_endbr(addr + 16))
+ if (!exact_endbr(addr + CFI_OFFSET))
continue;
- poison_endbr(addr + 16);
+ poison_endbr(addr + CFI_OFFSET);
}
}
@@ -1772,7 +1783,8 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
if (FINEIBT_WARN(fineibt_preamble_size, 20) ||
FINEIBT_WARN(fineibt_preamble_bhi + fineibt_bhi1_size, 20) ||
FINEIBT_WARN(fineibt_caller_size, 14) ||
- FINEIBT_WARN(fineibt_paranoid_size, 20))
+ FINEIBT_WARN(fineibt_paranoid_size, 20) ||
+ WARN_ON_ONCE(CFI_OFFSET < fineibt_prefix_size))
return;
if (cfi_mode == CFI_AUTO) {
@@ -1886,6 +1898,11 @@ static void poison_cfi(void *addr)
switch (cfi_mode) {
case CFI_FINEIBT:
/*
+ * FineIBT preamble is at func-16.
+ */
+ addr += CFI_OFFSET - fineibt_prefix_size;
+
+ /*
* FineIBT prefix should start with an ENDBR.
*/
if (!is_endbr(addr))
@@ -1923,8 +1940,6 @@ static void poison_cfi(void *addr)
}
}
-#define fineibt_prefix_size (fineibt_preamble_size - ENDBR_INSN_SIZE)
-
/*
* When regs->ip points to a 0xD6 byte in the FineIBT preamble,
* return true and fill out target and type.
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 5a6a772e0a6c..4dbff8ef9b1c 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -397,7 +397,7 @@ static inline void handle_invalid_op(struct pt_regs *regs)
ILL_ILLOPN, error_get_trap_addr(regs));
}
-static noinstr bool handle_bug(struct pt_regs *regs)
+noinstr bool handle_bug(struct pt_regs *regs)
{
unsigned long addr = regs->ip;
bool handled = false;
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index d916bd766c94..801bf9e520db 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -20,7 +20,6 @@ if VIRTUALIZATION
config KVM_X86
def_tristate KVM if (KVM_INTEL != n || KVM_AMD != n)
select KVM_COMMON
- select KVM_GENERIC_MMU_NOTIFIER
select KVM_ELIDE_TLB_FLUSH_IF_YOUNG
select KVM_MMU_LOCKLESS_AGING
select HAVE_KVM_IRQCHIP
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3fb64905d190..a03530795707 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4805,7 +4805,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
#endif
case KVM_CAP_NOP_IO_DELAY:
case KVM_CAP_MP_STATE:
- case KVM_CAP_SYNC_MMU:
case KVM_CAP_USER_NMI:
case KVM_CAP_IRQ_INJECT_STATUS:
case KVM_CAP_IOEVENTFD:
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 2fdc1f1f5adb..6b9ff1c6cafa 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -411,14 +411,11 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr)
return;
if (trapnr == X86_TRAP_UD) {
- if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN) {
- /* Skip the ud2. */
- regs->ip += LEN_UD2;
+ if (handle_bug(regs))
return;
- }
/*
- * If this was a BUG and report_bug returns or if this
+ * If this was a BUG and handle_bug returns or if this
* was just a normal #UD, we want to continue onward and
* crash.
*/
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 8f10080e6fe3..e9b78040d703 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -438,17 +438,8 @@ static void emit_kcfi(u8 **pprog, u32 hash)
EMIT1_off32(0xb8, hash); /* movl $hash, %eax */
#ifdef CONFIG_CALL_PADDING
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
+ for (int i = 0; i < CONFIG_FUNCTION_PADDING_CFI; i++)
+ EMIT1(0x90);
#endif
EMIT_ENDBR();
diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
index 4503c7c77a3e..25845bd5e507 100644
--- a/drivers/accel/amdxdna/aie2_ctx.c
+++ b/drivers/accel/amdxdna/aie2_ctx.c
@@ -23,9 +23,9 @@
#include "amdxdna_pci_drv.h"
#include "amdxdna_pm.h"
-static bool force_cmdlist;
+static bool force_cmdlist = true;
module_param(force_cmdlist, bool, 0600);
-MODULE_PARM_DESC(force_cmdlist, "Force use command list (Default false)");
+MODULE_PARM_DESC(force_cmdlist, "Force use command list (Default true)");
#define HWCTX_MAX_TIMEOUT 60000 /* milliseconds */
@@ -53,6 +53,7 @@ static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwct
{
drm_sched_stop(&hwctx->priv->sched, bad_job);
aie2_destroy_context(xdna->dev_handle, hwctx);
+ drm_sched_start(&hwctx->priv->sched, 0);
}
static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx)
@@ -80,7 +81,6 @@ static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hw
}
out:
- drm_sched_start(&hwctx->priv->sched, 0);
XDNA_DBG(xdna, "%s restarted, ret %d", hwctx->name, ret);
return ret;
}
@@ -297,19 +297,23 @@ aie2_sched_job_run(struct drm_sched_job *sched_job)
struct dma_fence *fence;
int ret;
- if (!hwctx->priv->mbox_chann)
+ ret = amdxdna_pm_resume_get(hwctx->client->xdna);
+ if (ret)
+ return NULL;
+
+ if (!hwctx->priv->mbox_chann) {
+ amdxdna_pm_suspend_put(hwctx->client->xdna);
return NULL;
+ }
- if (!mmget_not_zero(job->mm))
+ if (!mmget_not_zero(job->mm)) {
+ amdxdna_pm_suspend_put(hwctx->client->xdna);
return ERR_PTR(-ESRCH);
+ }
kref_get(&job->refcnt);
fence = dma_fence_get(job->fence);
- ret = amdxdna_pm_resume_get(hwctx->client->xdna);
- if (ret)
- goto out;
-
if (job->drv_cmd) {
switch (job->drv_cmd->opcode) {
case SYNC_DEBUG_BO:
@@ -497,7 +501,7 @@ static void aie2_release_resource(struct amdxdna_hwctx *hwctx)
if (AIE2_FEATURE_ON(xdna->dev_handle, AIE2_TEMPORAL_ONLY)) {
ret = aie2_destroy_context(xdna->dev_handle, hwctx);
- if (ret)
+ if (ret && ret != -ENODEV)
XDNA_ERR(xdna, "Destroy temporal only context failed, ret %d", ret);
} else {
ret = xrs_release_resource(xdna->xrs_hdl, (uintptr_t)hwctx);
@@ -629,7 +633,7 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
goto free_entity;
}
- ret = amdxdna_pm_resume_get(xdna);
+ ret = amdxdna_pm_resume_get_locked(xdna);
if (ret)
goto free_col_list;
@@ -760,7 +764,7 @@ static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size
if (!hwctx->cus)
return -ENOMEM;
- ret = amdxdna_pm_resume_get(xdna);
+ ret = amdxdna_pm_resume_get_locked(xdna);
if (ret)
goto free_cus;
@@ -1070,6 +1074,8 @@ void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo,
ret = dma_resv_wait_timeout(gobj->resv, DMA_RESV_USAGE_BOOKKEEP,
true, MAX_SCHEDULE_TIMEOUT);
- if (!ret || ret == -ERESTARTSYS)
+ if (!ret)
XDNA_ERR(xdna, "Failed to wait for bo, ret %ld", ret);
+ else if (ret == -ERESTARTSYS)
+ XDNA_DBG(xdna, "Wait for bo interrupted by signal");
}
diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
index 7d7dcfeaf794..277a27bce850 100644
--- a/drivers/accel/amdxdna/aie2_message.c
+++ b/drivers/accel/amdxdna/aie2_message.c
@@ -216,8 +216,10 @@ static int aie2_destroy_context_req(struct amdxdna_dev_hdl *ndev, u32 id)
req.context_id = id;
ret = aie2_send_mgmt_msg_wait(ndev, &msg);
- if (ret)
+ if (ret && ret != -ENODEV)
XDNA_WARN(xdna, "Destroy context failed, ret %d", ret);
+ else if (ret == -ENODEV)
+ XDNA_DBG(xdna, "Destroy context: device already stopped");
return ret;
}
@@ -318,6 +320,9 @@ int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwc
struct amdxdna_dev *xdna = ndev->xdna;
int ret;
+ if (!hwctx->priv->mbox_chann)
+ return 0;
+
xdna_mailbox_stop_channel(hwctx->priv->mbox_chann);
ret = aie2_destroy_context_req(ndev, hwctx->fw_ctx_id);
xdna_mailbox_destroy_channel(hwctx->priv->mbox_chann);
@@ -694,11 +699,11 @@ aie2_cmdlist_fill_npu_cf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *siz
u32 cmd_len;
void *cmd;
- memset(npu_slot, 0, sizeof(*npu_slot));
cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
if (*size < sizeof(*npu_slot) + cmd_len)
return -EINVAL;
+ memset(npu_slot, 0, sizeof(*npu_slot));
npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
if (npu_slot->cu_idx == INVALID_CU_IDX)
return -EINVAL;
@@ -719,7 +724,6 @@ aie2_cmdlist_fill_npu_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *si
u32 cmd_len;
u32 arg_sz;
- memset(npu_slot, 0, sizeof(*npu_slot));
sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
arg_sz = cmd_len - sizeof(*sn);
if (cmd_len < sizeof(*sn) || arg_sz > MAX_NPU_ARGS_SIZE)
@@ -728,6 +732,7 @@ aie2_cmdlist_fill_npu_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *si
if (*size < sizeof(*npu_slot) + arg_sz)
return -EINVAL;
+ memset(npu_slot, 0, sizeof(*npu_slot));
npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
if (npu_slot->cu_idx == INVALID_CU_IDX)
return -EINVAL;
@@ -751,7 +756,6 @@ aie2_cmdlist_fill_npu_preempt(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t
u32 cmd_len;
u32 arg_sz;
- memset(npu_slot, 0, sizeof(*npu_slot));
pd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
arg_sz = cmd_len - sizeof(*pd);
if (cmd_len < sizeof(*pd) || arg_sz > MAX_NPU_ARGS_SIZE)
@@ -760,6 +764,7 @@ aie2_cmdlist_fill_npu_preempt(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t
if (*size < sizeof(*npu_slot) + arg_sz)
return -EINVAL;
+ memset(npu_slot, 0, sizeof(*npu_slot));
npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
if (npu_slot->cu_idx == INVALID_CU_IDX)
return -EINVAL;
@@ -787,7 +792,6 @@ aie2_cmdlist_fill_npu_elf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *si
u32 cmd_len;
u32 arg_sz;
- memset(npu_slot, 0, sizeof(*npu_slot));
pd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
arg_sz = cmd_len - sizeof(*pd);
if (cmd_len < sizeof(*pd) || arg_sz > MAX_NPU_ARGS_SIZE)
@@ -796,6 +800,7 @@ aie2_cmdlist_fill_npu_elf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *si
if (*size < sizeof(*npu_slot) + arg_sz)
return -EINVAL;
+ memset(npu_slot, 0, sizeof(*npu_slot));
npu_slot->type = EXEC_NPU_TYPE_ELF;
npu_slot->inst_buf_addr = pd->inst_buf;
npu_slot->save_buf_addr = pd->save_buf;
diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
index 2a51b2658bfc..85079b6fc5d9 100644
--- a/drivers/accel/amdxdna/aie2_pci.c
+++ b/drivers/accel/amdxdna/aie2_pci.c
@@ -32,6 +32,11 @@ static int aie2_max_col = XRS_MAX_COL;
module_param(aie2_max_col, uint, 0600);
MODULE_PARM_DESC(aie2_max_col, "Maximum column could be used");
+static char *npu_fw[] = {
+ "npu_7.sbin",
+ "npu.sbin"
+};
+
/*
* The management mailbox channel is allocated by firmware.
* The related register and ring buffer information is on SRAM BAR.
@@ -323,6 +328,7 @@ static void aie2_hw_stop(struct amdxdna_dev *xdna)
return;
}
+ aie2_runtime_cfg(ndev, AIE2_RT_CFG_CLK_GATING, NULL);
aie2_mgmt_fw_fini(ndev);
xdna_mailbox_stop_channel(ndev->mgmt_chann);
xdna_mailbox_destroy_channel(ndev->mgmt_chann);
@@ -406,15 +412,15 @@ static int aie2_hw_start(struct amdxdna_dev *xdna)
goto stop_psp;
}
- ret = aie2_pm_init(ndev);
+ ret = aie2_mgmt_fw_init(ndev);
if (ret) {
- XDNA_ERR(xdna, "failed to init pm, ret %d", ret);
+ XDNA_ERR(xdna, "initial mgmt firmware failed, ret %d", ret);
goto destroy_mgmt_chann;
}
- ret = aie2_mgmt_fw_init(ndev);
+ ret = aie2_pm_init(ndev);
if (ret) {
- XDNA_ERR(xdna, "initial mgmt firmware failed, ret %d", ret);
+ XDNA_ERR(xdna, "failed to init pm, ret %d", ret);
goto destroy_mgmt_chann;
}
@@ -451,7 +457,6 @@ static int aie2_hw_suspend(struct amdxdna_dev *xdna)
{
struct amdxdna_client *client;
- guard(mutex)(&xdna->dev_lock);
list_for_each_entry(client, &xdna->client_list, node)
aie2_hwctx_suspend(client);
@@ -489,6 +494,7 @@ static int aie2_init(struct amdxdna_dev *xdna)
struct psp_config psp_conf;
const struct firmware *fw;
unsigned long bars = 0;
+ char *fw_full_path;
int i, nvec, ret;
if (!hypervisor_is_type(X86_HYPER_NATIVE)) {
@@ -503,7 +509,19 @@ static int aie2_init(struct amdxdna_dev *xdna)
ndev->priv = xdna->dev_info->dev_priv;
ndev->xdna = xdna;
- ret = request_firmware(&fw, ndev->priv->fw_path, &pdev->dev);
+ for (i = 0; i < ARRAY_SIZE(npu_fw); i++) {
+ fw_full_path = kasprintf(GFP_KERNEL, "%s%s", ndev->priv->fw_path, npu_fw[i]);
+ if (!fw_full_path)
+ return -ENOMEM;
+
+ ret = firmware_request_nowarn(&fw, fw_full_path, &pdev->dev);
+ kfree(fw_full_path);
+ if (!ret) {
+ XDNA_INFO(xdna, "Load firmware %s%s", ndev->priv->fw_path, npu_fw[i]);
+ break;
+ }
+ }
+
if (ret) {
XDNA_ERR(xdna, "failed to request_firmware %s, ret %d",
ndev->priv->fw_path, ret);
@@ -951,7 +969,7 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i
if (!drm_dev_enter(&xdna->ddev, &idx))
return -ENODEV;
- ret = amdxdna_pm_resume_get(xdna);
+ ret = amdxdna_pm_resume_get_locked(xdna);
if (ret)
goto dev_exit;
@@ -1044,7 +1062,7 @@ static int aie2_get_array(struct amdxdna_client *client,
if (!drm_dev_enter(&xdna->ddev, &idx))
return -ENODEV;
- ret = amdxdna_pm_resume_get(xdna);
+ ret = amdxdna_pm_resume_get_locked(xdna);
if (ret)
goto dev_exit;
@@ -1134,7 +1152,7 @@ static int aie2_set_state(struct amdxdna_client *client,
if (!drm_dev_enter(&xdna->ddev, &idx))
return -ENODEV;
- ret = amdxdna_pm_resume_get(xdna);
+ ret = amdxdna_pm_resume_get_locked(xdna);
if (ret)
goto dev_exit;
diff --git a/drivers/accel/amdxdna/aie2_pm.c b/drivers/accel/amdxdna/aie2_pm.c
index 579b8be13b18..29bd4403a94d 100644
--- a/drivers/accel/amdxdna/aie2_pm.c
+++ b/drivers/accel/amdxdna/aie2_pm.c
@@ -31,7 +31,7 @@ int aie2_pm_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
{
int ret;
- ret = amdxdna_pm_resume_get(ndev->xdna);
+ ret = amdxdna_pm_resume_get_locked(ndev->xdna);
if (ret)
return ret;
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c
index 59fa3800b9d3..263d36072540 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.c
+++ b/drivers/accel/amdxdna/amdxdna_ctx.c
@@ -104,7 +104,10 @@ void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size)
if (size) {
count = FIELD_GET(AMDXDNA_CMD_COUNT, cmd->header);
- if (unlikely(count <= num_masks)) {
+ if (unlikely(count <= num_masks ||
+ count * sizeof(u32) +
+ offsetof(struct amdxdna_cmd, data[0]) >
+ abo->mem.size)) {
*size = 0;
return NULL;
}
@@ -266,9 +269,9 @@ int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
struct amdxdna_drm_config_hwctx *args = data;
struct amdxdna_dev *xdna = to_xdna_dev(dev);
struct amdxdna_hwctx *hwctx;
- int ret, idx;
u32 buf_size;
void *buf;
+ int ret;
u64 val;
if (XDNA_MBZ_DBG(xdna, &args->pad, sizeof(args->pad)))
@@ -310,20 +313,17 @@ int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
return -EINVAL;
}
- mutex_lock(&xdna->dev_lock);
- idx = srcu_read_lock(&client->hwctx_srcu);
+ guard(mutex)(&xdna->dev_lock);
hwctx = xa_load(&client->hwctx_xa, args->handle);
if (!hwctx) {
XDNA_DBG(xdna, "PID %d failed to get hwctx %d", client->pid, args->handle);
ret = -EINVAL;
- goto unlock_srcu;
+ goto free_buf;
}
ret = xdna->dev_info->ops->hwctx_config(hwctx, args->param_type, val, buf, buf_size);
-unlock_srcu:
- srcu_read_unlock(&client->hwctx_srcu, idx);
- mutex_unlock(&xdna->dev_lock);
+free_buf:
kfree(buf);
return ret;
}
@@ -334,7 +334,7 @@ int amdxdna_hwctx_sync_debug_bo(struct amdxdna_client *client, u32 debug_bo_hdl)
struct amdxdna_hwctx *hwctx;
struct amdxdna_gem_obj *abo;
struct drm_gem_object *gobj;
- int ret, idx;
+ int ret;
if (!xdna->dev_info->ops->hwctx_sync_debug_bo)
return -EOPNOTSUPP;
@@ -345,17 +345,15 @@ int amdxdna_hwctx_sync_debug_bo(struct amdxdna_client *client, u32 debug_bo_hdl)
abo = to_xdna_obj(gobj);
guard(mutex)(&xdna->dev_lock);
- idx = srcu_read_lock(&client->hwctx_srcu);
hwctx = xa_load(&client->hwctx_xa, abo->assigned_hwctx);
if (!hwctx) {
ret = -EINVAL;
- goto unlock_srcu;
+ goto put_obj;
}
ret = xdna->dev_info->ops->hwctx_sync_debug_bo(hwctx, debug_bo_hdl);
-unlock_srcu:
- srcu_read_unlock(&client->hwctx_srcu, idx);
+put_obj:
drm_gem_object_put(gobj);
return ret;
}
diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c
index 8c290ddd3251..d60db49ead71 100644
--- a/drivers/accel/amdxdna/amdxdna_gem.c
+++ b/drivers/accel/amdxdna/amdxdna_gem.c
@@ -21,8 +21,6 @@
#include "amdxdna_pci_drv.h"
#include "amdxdna_ubuf.h"
-#define XDNA_MAX_CMD_BO_SIZE SZ_32K
-
MODULE_IMPORT_NS("DMA_BUF");
static int
@@ -745,12 +743,6 @@ amdxdna_drm_create_cmd_bo(struct drm_device *dev,
{
struct amdxdna_dev *xdna = to_xdna_dev(dev);
struct amdxdna_gem_obj *abo;
- int ret;
-
- if (args->size > XDNA_MAX_CMD_BO_SIZE) {
- XDNA_ERR(xdna, "Command bo size 0x%llx too large", args->size);
- return ERR_PTR(-EINVAL);
- }
if (args->size < sizeof(struct amdxdna_cmd)) {
XDNA_DBG(xdna, "Command BO size 0x%llx too small", args->size);
@@ -764,17 +756,7 @@ amdxdna_drm_create_cmd_bo(struct drm_device *dev,
abo->type = AMDXDNA_BO_CMD;
abo->client = filp->driver_priv;
- ret = amdxdna_gem_obj_vmap(abo, &abo->mem.kva);
- if (ret) {
- XDNA_ERR(xdna, "Vmap cmd bo failed, ret %d", ret);
- goto release_obj;
- }
-
return abo;
-
-release_obj:
- drm_gem_object_put(to_gobj(abo));
- return ERR_PTR(ret);
}
int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
@@ -871,6 +853,7 @@ struct amdxdna_gem_obj *amdxdna_gem_get_obj(struct amdxdna_client *client,
struct amdxdna_dev *xdna = client->xdna;
struct amdxdna_gem_obj *abo;
struct drm_gem_object *gobj;
+ int ret;
gobj = drm_gem_object_lookup(client->filp, bo_hdl);
if (!gobj) {
@@ -879,9 +862,26 @@ struct amdxdna_gem_obj *amdxdna_gem_get_obj(struct amdxdna_client *client,
}
abo = to_xdna_obj(gobj);
- if (bo_type == AMDXDNA_BO_INVALID || abo->type == bo_type)
+ if (bo_type != AMDXDNA_BO_INVALID && abo->type != bo_type)
+ goto put_obj;
+
+ if (bo_type != AMDXDNA_BO_CMD || abo->mem.kva)
return abo;
+ if (abo->mem.size > SZ_32K) {
+ XDNA_ERR(xdna, "Cmd bo is too big %ld", abo->mem.size);
+ goto put_obj;
+ }
+
+ ret = amdxdna_gem_obj_vmap(abo, &abo->mem.kva);
+ if (ret) {
+ XDNA_ERR(xdna, "Vmap cmd bo failed, ret %d", ret);
+ goto put_obj;
+ }
+
+ return abo;
+
+put_obj:
drm_gem_object_put(gobj);
return NULL;
}
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
index 4ada45d06fcf..a4384593bdcc 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
@@ -23,6 +23,9 @@ MODULE_FIRMWARE("amdnpu/1502_00/npu.sbin");
MODULE_FIRMWARE("amdnpu/17f0_10/npu.sbin");
MODULE_FIRMWARE("amdnpu/17f0_11/npu.sbin");
MODULE_FIRMWARE("amdnpu/17f0_20/npu.sbin");
+MODULE_FIRMWARE("amdnpu/1502_00/npu_7.sbin");
+MODULE_FIRMWARE("amdnpu/17f0_10/npu_7.sbin");
+MODULE_FIRMWARE("amdnpu/17f0_11/npu_7.sbin");
/*
* 0.0: Initial version
diff --git a/drivers/accel/amdxdna/amdxdna_pm.c b/drivers/accel/amdxdna/amdxdna_pm.c
index d024d480521c..b1fafddd7ad5 100644
--- a/drivers/accel/amdxdna/amdxdna_pm.c
+++ b/drivers/accel/amdxdna/amdxdna_pm.c
@@ -16,6 +16,7 @@ int amdxdna_pm_suspend(struct device *dev)
struct amdxdna_dev *xdna = to_xdna_dev(dev_get_drvdata(dev));
int ret = -EOPNOTSUPP;
+ guard(mutex)(&xdna->dev_lock);
if (xdna->dev_info->ops->suspend)
ret = xdna->dev_info->ops->suspend(xdna);
@@ -28,6 +29,7 @@ int amdxdna_pm_resume(struct device *dev)
struct amdxdna_dev *xdna = to_xdna_dev(dev_get_drvdata(dev));
int ret = -EOPNOTSUPP;
+ guard(mutex)(&xdna->dev_lock);
if (xdna->dev_info->ops->resume)
ret = xdna->dev_info->ops->resume(xdna);
diff --git a/drivers/accel/amdxdna/amdxdna_pm.h b/drivers/accel/amdxdna/amdxdna_pm.h
index 77b2d6e45570..3d26b973e0e3 100644
--- a/drivers/accel/amdxdna/amdxdna_pm.h
+++ b/drivers/accel/amdxdna/amdxdna_pm.h
@@ -15,4 +15,15 @@ void amdxdna_pm_suspend_put(struct amdxdna_dev *xdna);
void amdxdna_pm_init(struct amdxdna_dev *xdna);
void amdxdna_pm_fini(struct amdxdna_dev *xdna);
+static inline int amdxdna_pm_resume_get_locked(struct amdxdna_dev *xdna)
+{
+ int ret;
+
+ mutex_unlock(&xdna->dev_lock);
+ ret = amdxdna_pm_resume_get(xdna);
+ mutex_lock(&xdna->dev_lock);
+
+ return ret;
+}
+
#endif /* _AMDXDNA_PM_H_ */
diff --git a/drivers/accel/amdxdna/amdxdna_ubuf.c b/drivers/accel/amdxdna/amdxdna_ubuf.c
index b509f10b155c..fb71d6e3f44d 100644
--- a/drivers/accel/amdxdna/amdxdna_ubuf.c
+++ b/drivers/accel/amdxdna/amdxdna_ubuf.c
@@ -7,6 +7,7 @@
#include <drm/drm_device.h>
#include <drm/drm_print.h>
#include <linux/dma-buf.h>
+#include <linux/overflow.h>
#include <linux/pagemap.h>
#include <linux/vmalloc.h>
@@ -176,7 +177,10 @@ struct dma_buf *amdxdna_get_ubuf(struct drm_device *dev,
goto free_ent;
}
- exp_info.size += va_ent[i].len;
+ if (check_add_overflow(exp_info.size, va_ent[i].len, &exp_info.size)) {
+ ret = -EINVAL;
+ goto free_ent;
+ }
}
ubuf->nr_pages = exp_info.size >> PAGE_SHIFT;
diff --git a/drivers/accel/amdxdna/npu1_regs.c b/drivers/accel/amdxdna/npu1_regs.c
index 6f36a27b5a02..6e3d3ca69c04 100644
--- a/drivers/accel/amdxdna/npu1_regs.c
+++ b/drivers/accel/amdxdna/npu1_regs.c
@@ -72,7 +72,7 @@ static const struct aie2_fw_feature_tbl npu1_fw_feature_table[] = {
};
static const struct amdxdna_dev_priv npu1_dev_priv = {
- .fw_path = "amdnpu/1502_00/npu.sbin",
+ .fw_path = "amdnpu/1502_00/",
.rt_config = npu1_default_rt_cfg,
.dpm_clk_tbl = npu1_dpm_clk_table,
.fw_feature_tbl = npu1_fw_feature_table,
diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c
index a8d6f76dde5f..ce25eef5fc34 100644
--- a/drivers/accel/amdxdna/npu4_regs.c
+++ b/drivers/accel/amdxdna/npu4_regs.c
@@ -98,7 +98,7 @@ const struct aie2_fw_feature_tbl npu4_fw_feature_table[] = {
};
static const struct amdxdna_dev_priv npu4_dev_priv = {
- .fw_path = "amdnpu/17f0_10/npu.sbin",
+ .fw_path = "amdnpu/17f0_10/",
.rt_config = npu4_default_rt_cfg,
.dpm_clk_tbl = npu4_dpm_clk_table,
.fw_feature_tbl = npu4_fw_feature_table,
diff --git a/drivers/accel/amdxdna/npu5_regs.c b/drivers/accel/amdxdna/npu5_regs.c
index c0a35cfd886c..c0ac5daf32ee 100644
--- a/drivers/accel/amdxdna/npu5_regs.c
+++ b/drivers/accel/amdxdna/npu5_regs.c
@@ -63,7 +63,7 @@
#define NPU5_SRAM_BAR_BASE MMNPU_APERTURE1_BASE
static const struct amdxdna_dev_priv npu5_dev_priv = {
- .fw_path = "amdnpu/17f0_11/npu.sbin",
+ .fw_path = "amdnpu/17f0_11/",
.rt_config = npu4_default_rt_cfg,
.dpm_clk_tbl = npu4_dpm_clk_table,
.fw_feature_tbl = npu4_fw_feature_table,
diff --git a/drivers/accel/amdxdna/npu6_regs.c b/drivers/accel/amdxdna/npu6_regs.c
index 1fb07df99186..ce591ed0d483 100644
--- a/drivers/accel/amdxdna/npu6_regs.c
+++ b/drivers/accel/amdxdna/npu6_regs.c
@@ -63,7 +63,7 @@
#define NPU6_SRAM_BAR_BASE MMNPU_APERTURE1_BASE
static const struct amdxdna_dev_priv npu6_dev_priv = {
- .fw_path = "amdnpu/17f0_10/npu.sbin",
+ .fw_path = "amdnpu/17f0_10/",
.rt_config = npu4_default_rt_cfg,
.dpm_clk_tbl = npu4_dpm_clk_table,
.fw_feature_tbl = npu4_fw_feature_table,
diff --git a/drivers/accel/ethosu/ethosu_gem.c b/drivers/accel/ethosu/ethosu_gem.c
index 4fbd4bbf2909..668c71d5ff45 100644
--- a/drivers/accel/ethosu/ethosu_gem.c
+++ b/drivers/accel/ethosu/ethosu_gem.c
@@ -154,7 +154,7 @@ static void cmd_state_init(struct cmd_state *st)
static u64 cmd_to_addr(u32 *cmd)
{
- return ((u64)((cmd[0] & 0xff0000) << 16)) | cmd[1];
+ return (((u64)cmd[0] & 0xff0000) << 16) | cmd[1];
}
static u64 dma_length(struct ethosu_validated_cmdstream_info *info,
diff --git a/drivers/acpi/osi.c b/drivers/acpi/osi.c
index f2c943b934be..9470f1830ff5 100644
--- a/drivers/acpi/osi.c
+++ b/drivers/acpi/osi.c
@@ -390,6 +390,19 @@ static const struct dmi_system_id acpi_osi_dmi_table[] __initconst = {
},
/*
+ * The screen backlight turns off during udev device creation
+ * when returning true for _OSI("Windows 2009")
+ */
+ {
+ .callback = dmi_disable_osi_win7,
+ .ident = "Acer Aspire One D255",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "AOD255"),
+ },
+ },
+
+ /*
* The wireless hotkey does not work on those machines when
* returning true for _OSI("Windows 2012")
*/
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 66ec81e306d4..132a9df98471 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -386,6 +386,14 @@ static const struct dmi_system_id acpisleep_dmi_table[] __initconst = {
DMI_MATCH(DMI_PRODUCT_NAME, "80E1"),
},
},
+ {
+ .callback = init_nvs_save_s3,
+ .ident = "Lenovo G70-35",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "80Q5"),
+ },
+ },
/*
* ThinkPad X1 Tablet(2016) cannot do suspend-to-idle using
* the Low Power S0 Idle firmware interface (see
diff --git a/drivers/base/property.c b/drivers/base/property.c
index 6a63860579dd..8d9a34be57fb 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -797,7 +797,18 @@ struct fwnode_handle *
fwnode_get_next_child_node(const struct fwnode_handle *fwnode,
struct fwnode_handle *child)
{
- return fwnode_call_ptr_op(fwnode, get_next_child_node, child);
+ struct fwnode_handle *next;
+
+ if (IS_ERR_OR_NULL(fwnode))
+ return NULL;
+
+ /* Try to find a child in primary fwnode */
+ next = fwnode_call_ptr_op(fwnode, get_next_child_node, child);
+ if (next)
+ return next;
+
+ /* When no more children in primary, continue with secondary */
+ return fwnode_call_ptr_op(fwnode->secondary, get_next_child_node, child);
}
EXPORT_SYMBOL_GPL(fwnode_get_next_child_node);
@@ -841,19 +852,7 @@ EXPORT_SYMBOL_GPL(fwnode_get_next_available_child_node);
struct fwnode_handle *device_get_next_child_node(const struct device *dev,
struct fwnode_handle *child)
{
- const struct fwnode_handle *fwnode = dev_fwnode(dev);
- struct fwnode_handle *next;
-
- if (IS_ERR_OR_NULL(fwnode))
- return NULL;
-
- /* Try to find a child in primary fwnode */
- next = fwnode_get_next_child_node(fwnode, child);
- if (next)
- return next;
-
- /* When no more children in primary, continue with secondary */
- return fwnode_get_next_child_node(fwnode->secondary, child);
+ return fwnode_get_next_child_node(dev_fwnode(dev), child);
}
EXPORT_SYMBOL_GPL(device_get_next_child_node);
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 742b2908ff68..b3dbf6c76e98 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -483,38 +483,20 @@ void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i)
int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i)
{
- struct lru_cache *al = device->act_log;
/* for bios crossing activity log extent boundaries,
* we may need to activate two extents in one go */
unsigned first = i->sector >> (AL_EXTENT_SHIFT-9);
unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9);
- unsigned nr_al_extents;
- unsigned available_update_slots;
unsigned enr;
- D_ASSERT(device, first <= last);
-
- nr_al_extents = 1 + last - first; /* worst case: all touched extends are cold. */
- available_update_slots = min(al->nr_elements - al->used,
- al->max_pending_changes - al->pending_changes);
-
- /* We want all necessary updates for a given request within the same transaction
- * We could first check how many updates are *actually* needed,
- * and use that instead of the worst-case nr_al_extents */
- if (available_update_slots < nr_al_extents) {
- /* Too many activity log extents are currently "hot".
- *
- * If we have accumulated pending changes already,
- * we made progress.
- *
- * If we cannot get even a single pending change through,
- * stop the fast path until we made some progress,
- * or requests to "cold" extents could be starved. */
- if (!al->pending_changes)
- __set_bit(__LC_STARVING, &device->act_log->flags);
- return -ENOBUFS;
+ if (i->partially_in_al_next_enr) {
+ D_ASSERT(device, first < i->partially_in_al_next_enr);
+ D_ASSERT(device, last >= i->partially_in_al_next_enr);
+ first = i->partially_in_al_next_enr;
}
+ D_ASSERT(device, first <= last);
+
/* Is resync active in this area? */
for (enr = first; enr <= last; enr++) {
struct lc_element *tmp;
@@ -529,14 +511,21 @@ int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *
}
}
- /* Checkout the refcounts.
- * Given that we checked for available elements and update slots above,
- * this has to be successful. */
+ /* Try to checkout the refcounts. */
for (enr = first; enr <= last; enr++) {
struct lc_element *al_ext;
al_ext = lc_get_cumulative(device->act_log, enr);
- if (!al_ext)
- drbd_info(device, "LOGIC BUG for enr=%u\n", enr);
+
+ if (!al_ext) {
+ /* Did not work. We may have exhausted the possible
+ * changes per transaction. Or raced with someone
+ * "locking" it against changes.
+ * Remember where to continue from.
+ */
+ if (enr > first)
+ i->partially_in_al_next_enr = enr;
+ return -ENOBUFS;
+ }
}
return 0;
}
@@ -556,7 +545,11 @@ void drbd_al_complete_io(struct drbd_device *device, struct drbd_interval *i)
for (enr = first; enr <= last; enr++) {
extent = lc_find(device->act_log, enr);
- if (!extent) {
+ /* Yes, this masks a bug elsewhere. However, during normal
+ * operation this is harmless, so no need to crash the kernel
+ * by the BUG_ON(refcount == 0) in lc_put().
+ */
+ if (!extent || extent->refcnt == 0) {
drbd_err(device, "al_complete_io() called on inactive extent %u\n", enr);
continue;
}
diff --git a/drivers/block/drbd/drbd_interval.h b/drivers/block/drbd/drbd_interval.h
index 366489b72fe9..5d3213b81eed 100644
--- a/drivers/block/drbd/drbd_interval.h
+++ b/drivers/block/drbd/drbd_interval.h
@@ -8,12 +8,15 @@
struct drbd_interval {
struct rb_node rb;
sector_t sector; /* start sector of the interval */
- unsigned int size; /* size in bytes */
sector_t end; /* highest interval end in subtree */
+ unsigned int size; /* size in bytes */
unsigned int local:1 /* local or remote request? */;
unsigned int waiting:1; /* someone is waiting for completion */
unsigned int completed:1; /* this has been completed already;
* ignore for conflict detection */
+
+ /* to resume a partially successful drbd_al_begin_io_nonblock(); */
+ unsigned int partially_in_al_next_enr;
};
static inline void drbd_clear_interval(struct drbd_interval *i)
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index b8f0eddf7e87..200d464e984b 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -32,6 +32,7 @@
#include <linux/memcontrol.h>
#include <linux/mm_inline.h>
#include <linux/slab.h>
+#include <linux/string.h>
#include <linux/random.h>
#include <linux/reboot.h>
#include <linux/notifier.h>
@@ -732,9 +733,9 @@ int drbd_send_sync_param(struct drbd_peer_device *peer_device)
}
if (apv >= 88)
- strcpy(p->verify_alg, nc->verify_alg);
+ strscpy(p->verify_alg, nc->verify_alg);
if (apv >= 89)
- strcpy(p->csums_alg, nc->csums_alg);
+ strscpy(p->csums_alg, nc->csums_alg);
rcu_read_unlock();
return drbd_send_command(peer_device, sock, cmd, size, NULL, 0);
@@ -745,6 +746,7 @@ int __drbd_send_protocol(struct drbd_connection *connection, enum drbd_packet cm
struct drbd_socket *sock;
struct p_protocol *p;
struct net_conf *nc;
+ size_t integrity_alg_len;
int size, cf;
sock = &connection->data;
@@ -762,8 +764,10 @@ int __drbd_send_protocol(struct drbd_connection *connection, enum drbd_packet cm
}
size = sizeof(*p);
- if (connection->agreed_pro_version >= 87)
- size += strlen(nc->integrity_alg) + 1;
+ if (connection->agreed_pro_version >= 87) {
+ integrity_alg_len = strlen(nc->integrity_alg) + 1;
+ size += integrity_alg_len;
+ }
p->protocol = cpu_to_be32(nc->wire_protocol);
p->after_sb_0p = cpu_to_be32(nc->after_sb_0p);
@@ -778,7 +782,7 @@ int __drbd_send_protocol(struct drbd_connection *connection, enum drbd_packet cm
p->conn_flags = cpu_to_be32(cf);
if (connection->agreed_pro_version >= 87)
- strcpy(p->integrity_alg, nc->integrity_alg);
+ strscpy(p->integrity_alg, nc->integrity_alg, integrity_alg_len);
rcu_read_unlock();
return __conn_send_command(connection, sock, cmd, size, NULL, 0);
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 3cfef456a1f3..58b95bf4bdca 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -3801,14 +3801,14 @@ static int receive_SyncParam(struct drbd_connection *connection, struct packet_i
*new_net_conf = *old_net_conf;
if (verify_tfm) {
- strcpy(new_net_conf->verify_alg, p->verify_alg);
+ strscpy(new_net_conf->verify_alg, p->verify_alg);
new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
crypto_free_shash(peer_device->connection->verify_tfm);
peer_device->connection->verify_tfm = verify_tfm;
drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg);
}
if (csums_tfm) {
- strcpy(new_net_conf->csums_alg, p->csums_alg);
+ strscpy(new_net_conf->csums_alg, p->csums_alg);
new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
crypto_free_shash(peer_device->connection->csums_tfm);
peer_device->connection->csums_tfm = csums_tfm;
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index d15826f6ee81..70f75ef07945 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -621,7 +621,8 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
break;
case READ_COMPLETED_WITH_ERROR:
- drbd_set_out_of_sync(peer_device, req->i.sector, req->i.size);
+ drbd_set_out_of_sync(first_peer_device(device),
+ req->i.sector, req->i.size);
drbd_report_io_error(device, req);
__drbd_chk_io_error(device, DRBD_READ_ERROR);
fallthrough;
diff --git a/drivers/block/zloop.c b/drivers/block/zloop.c
index 65a40266437c..51c043342127 100644
--- a/drivers/block/zloop.c
+++ b/drivers/block/zloop.c
@@ -542,6 +542,21 @@ out:
zloop_put_cmd(cmd);
}
+/*
+ * Sync the entire FS containing the zone files instead of walking all files.
+ */
+static int zloop_flush(struct zloop_device *zlo)
+{
+ struct super_block *sb = file_inode(zlo->data_dir)->i_sb;
+ int ret;
+
+ down_read(&sb->s_umount);
+ ret = sync_filesystem(sb);
+ up_read(&sb->s_umount);
+
+ return ret;
+}
+
static void zloop_handle_cmd(struct zloop_cmd *cmd)
{
struct request *rq = blk_mq_rq_from_pdu(cmd);
@@ -562,11 +577,7 @@ static void zloop_handle_cmd(struct zloop_cmd *cmd)
zloop_rw(cmd);
return;
case REQ_OP_FLUSH:
- /*
- * Sync the entire FS containing the zone files instead of
- * walking all files
- */
- cmd->ret = sync_filesystem(file_inode(zlo->data_dir)->i_sb);
+ cmd->ret = zloop_flush(zlo);
break;
case REQ_OP_ZONE_RESET:
cmd->ret = zloop_reset_zone(zlo, rq_zone_no(rq));
@@ -981,7 +992,8 @@ static int zloop_ctl_add(struct zloop_options *opts)
struct queue_limits lim = {
.max_hw_sectors = SZ_1M >> SECTOR_SHIFT,
.chunk_sectors = opts->zone_size,
- .features = BLK_FEAT_ZONED,
+ .features = BLK_FEAT_ZONED | BLK_FEAT_WRITE_CACHE,
+
};
unsigned int nr_zones, i, j;
struct zloop_device *zlo;
@@ -1162,7 +1174,12 @@ static int zloop_ctl_remove(struct zloop_options *opts)
int ret;
if (!(opts->mask & ZLOOP_OPT_ID)) {
- pr_err("No ID specified\n");
+ pr_err("No ID specified for remove\n");
+ return -EINVAL;
+ }
+
+ if (opts->mask & ~ZLOOP_OPT_ID) {
+ pr_err("Invalid option specified for remove\n");
return -EINVAL;
}
diff --git a/drivers/char/ipmi/ipmi_ipmb.c b/drivers/char/ipmi/ipmi_ipmb.c
index 3a51e58b2487..28818952a7a4 100644
--- a/drivers/char/ipmi/ipmi_ipmb.c
+++ b/drivers/char/ipmi/ipmi_ipmb.c
@@ -202,11 +202,16 @@ static int ipmi_ipmb_slave_cb(struct i2c_client *client,
break;
case I2C_SLAVE_READ_REQUESTED:
+ *val = 0xff;
+ ipmi_ipmb_check_msg_done(iidev);
+ break;
+
case I2C_SLAVE_STOP:
ipmi_ipmb_check_msg_done(iidev);
break;
case I2C_SLAVE_READ_PROCESSED:
+ *val = 0xff;
break;
}
diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index c530966c4e07..c41f51c82edd 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -602,6 +602,22 @@ static int __ipmi_bmc_register(struct ipmi_smi *intf,
static int __scan_channels(struct ipmi_smi *intf,
struct ipmi_device_id *id, bool rescan);
+static void ipmi_lock_xmit_msgs(struct ipmi_smi *intf, int run_to_completion,
+ unsigned long *flags)
+{
+ if (run_to_completion)
+ return;
+ spin_lock_irqsave(&intf->xmit_msgs_lock, *flags);
+}
+
+static void ipmi_unlock_xmit_msgs(struct ipmi_smi *intf, int run_to_completion,
+ unsigned long *flags)
+{
+ if (run_to_completion)
+ return;
+ spin_unlock_irqrestore(&intf->xmit_msgs_lock, *flags);
+}
+
static void free_ipmi_user(struct kref *ref)
{
struct ipmi_user *user = container_of(ref, struct ipmi_user, refcount);
@@ -1869,21 +1885,32 @@ static struct ipmi_smi_msg *smi_add_send_msg(struct ipmi_smi *intf,
return smi_msg;
}
-static void smi_send(struct ipmi_smi *intf,
+static int smi_send(struct ipmi_smi *intf,
const struct ipmi_smi_handlers *handlers,
struct ipmi_smi_msg *smi_msg, int priority)
{
int run_to_completion = READ_ONCE(intf->run_to_completion);
unsigned long flags = 0;
+ int rv = 0;
- if (!run_to_completion)
- spin_lock_irqsave(&intf->xmit_msgs_lock, flags);
+ ipmi_lock_xmit_msgs(intf, run_to_completion, &flags);
smi_msg = smi_add_send_msg(intf, smi_msg, priority);
- if (!run_to_completion)
- spin_unlock_irqrestore(&intf->xmit_msgs_lock, flags);
+ ipmi_unlock_xmit_msgs(intf, run_to_completion, &flags);
- if (smi_msg)
- handlers->sender(intf->send_info, smi_msg);
+ if (smi_msg) {
+ rv = handlers->sender(intf->send_info, smi_msg);
+ if (rv) {
+ ipmi_lock_xmit_msgs(intf, run_to_completion, &flags);
+ intf->curr_msg = NULL;
+ ipmi_unlock_xmit_msgs(intf, run_to_completion, &flags);
+ /*
+ * Something may have been added to the transmit
+ * queue, so schedule a check for that.
+ */
+ queue_work(system_wq, &intf->smi_work);
+ }
+ }
+ return rv;
}
static bool is_maintenance_mode_cmd(struct kernel_ipmi_msg *msg)
@@ -2296,6 +2323,7 @@ static int i_ipmi_request(struct ipmi_user *user,
struct ipmi_recv_msg *recv_msg;
int run_to_completion = READ_ONCE(intf->run_to_completion);
int rv = 0;
+ bool in_seq_table = false;
if (supplied_recv) {
recv_msg = supplied_recv;
@@ -2349,33 +2377,50 @@ static int i_ipmi_request(struct ipmi_user *user,
rv = i_ipmi_req_ipmb(intf, addr, msgid, msg, smi_msg, recv_msg,
source_address, source_lun,
retries, retry_time_ms);
+ in_seq_table = true;
} else if (is_ipmb_direct_addr(addr)) {
rv = i_ipmi_req_ipmb_direct(intf, addr, msgid, msg, smi_msg,
recv_msg, source_lun);
} else if (is_lan_addr(addr)) {
rv = i_ipmi_req_lan(intf, addr, msgid, msg, smi_msg, recv_msg,
source_lun, retries, retry_time_ms);
+ in_seq_table = true;
} else {
- /* Unknown address type. */
+ /* Unknown address type. */
ipmi_inc_stat(intf, sent_invalid_commands);
rv = -EINVAL;
}
- if (rv) {
-out_err:
- if (!supplied_smi)
- ipmi_free_smi_msg(smi_msg);
- if (!supplied_recv)
- ipmi_free_recv_msg(recv_msg);
- } else {
+ if (!rv) {
dev_dbg(intf->si_dev, "Send: %*ph\n",
smi_msg->data_size, smi_msg->data);
- smi_send(intf, intf->handlers, smi_msg, priority);
+ rv = smi_send(intf, intf->handlers, smi_msg, priority);
+ if (rv != IPMI_CC_NO_ERROR)
+ /* smi_send() returns an IPMI err, return a Linux one. */
+ rv = -EIO;
+ if (rv && in_seq_table) {
+ /*
+ * If it's in the sequence table, it will be
+ * retried later, so ignore errors.
+ */
+ rv = 0;
+ /* But we need to fix the timeout. */
+ intf_start_seq_timer(intf, smi_msg->msgid);
+ ipmi_free_smi_msg(smi_msg);
+ smi_msg = NULL;
+ }
}
+out_err:
if (!run_to_completion)
mutex_unlock(&intf->users_mutex);
+ if (rv) {
+ if (!supplied_smi)
+ ipmi_free_smi_msg(smi_msg);
+ if (!supplied_recv)
+ ipmi_free_recv_msg(recv_msg);
+ }
return rv;
}
@@ -3949,12 +3994,12 @@ static int handle_ipmb_get_msg_cmd(struct ipmi_smi *intf,
dev_dbg(intf->si_dev, "Invalid command: %*ph\n",
msg->data_size, msg->data);
- smi_send(intf, intf->handlers, msg, 0);
- /*
- * We used the message, so return the value that
- * causes it to not be freed or queued.
- */
- rv = -1;
+ if (smi_send(intf, intf->handlers, msg, 0) == IPMI_CC_NO_ERROR)
+ /*
+ * We used the message, so return the value that
+ * causes it to not be freed or queued.
+ */
+ rv = -1;
} else if (!IS_ERR(recv_msg)) {
/* Extract the source address from the data. */
ipmb_addr = (struct ipmi_ipmb_addr *) &recv_msg->addr;
@@ -4028,12 +4073,12 @@ static int handle_ipmb_direct_rcv_cmd(struct ipmi_smi *intf,
msg->data[4] = IPMI_INVALID_CMD_COMPLETION_CODE;
msg->data_size = 5;
- smi_send(intf, intf->handlers, msg, 0);
- /*
- * We used the message, so return the value that
- * causes it to not be freed or queued.
- */
- rv = -1;
+ if (smi_send(intf, intf->handlers, msg, 0) == IPMI_CC_NO_ERROR)
+ /*
+ * We used the message, so return the value that
+ * causes it to not be freed or queued.
+ */
+ rv = -1;
} else if (!IS_ERR(recv_msg)) {
/* Extract the source address from the data. */
daddr = (struct ipmi_ipmb_direct_addr *)&recv_msg->addr;
@@ -4173,7 +4218,7 @@ static int handle_lan_get_msg_cmd(struct ipmi_smi *intf,
struct ipmi_smi_msg *msg)
{
struct cmd_rcvr *rcvr;
- int rv = 0;
+ int rv = 0; /* Free by default */
unsigned char netfn;
unsigned char cmd;
unsigned char chan;
@@ -4226,12 +4271,12 @@ static int handle_lan_get_msg_cmd(struct ipmi_smi *intf,
dev_dbg(intf->si_dev, "Invalid command: %*ph\n",
msg->data_size, msg->data);
- smi_send(intf, intf->handlers, msg, 0);
- /*
- * We used the message, so return the value that
- * causes it to not be freed or queued.
- */
- rv = -1;
+ if (smi_send(intf, intf->handlers, msg, 0) == IPMI_CC_NO_ERROR)
+ /*
+ * We used the message, so return the value that
+ * causes it to not be freed or queued.
+ */
+ rv = -1;
} else if (!IS_ERR(recv_msg)) {
/* Extract the source address from the data. */
lan_addr = (struct ipmi_lan_addr *) &recv_msg->addr;
@@ -4824,8 +4869,7 @@ static void smi_work(struct work_struct *t)
* message delivery.
*/
restart:
- if (!run_to_completion)
- spin_lock_irqsave(&intf->xmit_msgs_lock, flags);
+ ipmi_lock_xmit_msgs(intf, run_to_completion, &flags);
if (intf->curr_msg == NULL && !intf->in_shutdown) {
struct list_head *entry = NULL;
@@ -4841,8 +4885,7 @@ restart:
intf->curr_msg = newmsg;
}
}
- if (!run_to_completion)
- spin_unlock_irqrestore(&intf->xmit_msgs_lock, flags);
+ ipmi_unlock_xmit_msgs(intf, run_to_completion, &flags);
if (newmsg) {
cc = intf->handlers->sender(intf->send_info, newmsg);
@@ -4850,8 +4893,11 @@ restart:
if (newmsg->recv_msg)
deliver_err_response(intf,
newmsg->recv_msg, cc);
- else
- ipmi_free_smi_msg(newmsg);
+ ipmi_lock_xmit_msgs(intf, run_to_completion, &flags);
+ intf->curr_msg = NULL;
+ ipmi_unlock_xmit_msgs(intf, run_to_completion, &flags);
+ ipmi_free_smi_msg(newmsg);
+ newmsg = NULL;
goto restart;
}
}
@@ -4919,16 +4965,14 @@ void ipmi_smi_msg_received(struct ipmi_smi *intf,
spin_unlock_irqrestore(&intf->waiting_rcv_msgs_lock,
flags);
- if (!run_to_completion)
- spin_lock_irqsave(&intf->xmit_msgs_lock, flags);
+ ipmi_lock_xmit_msgs(intf, run_to_completion, &flags);
/*
* We can get an asynchronous event or receive message in addition
* to commands we send.
*/
if (msg == intf->curr_msg)
intf->curr_msg = NULL;
- if (!run_to_completion)
- spin_unlock_irqrestore(&intf->xmit_msgs_lock, flags);
+ ipmi_unlock_xmit_msgs(intf, run_to_completion, &flags);
if (run_to_completion)
smi_work(&intf->smi_work);
@@ -5041,7 +5085,12 @@ static void check_msg_timeout(struct ipmi_smi *intf, struct seq_table *ent,
ipmi_inc_stat(intf,
retransmitted_ipmb_commands);
- smi_send(intf, intf->handlers, smi_msg, 0);
+ /* If this fails we'll retry later or timeout. */
+ if (smi_send(intf, intf->handlers, smi_msg, 0) != IPMI_CC_NO_ERROR) {
+ /* But fix the timeout. */
+ intf_start_seq_timer(intf, smi_msg->msgid);
+ ipmi_free_smi_msg(smi_msg);
+ }
} else
ipmi_free_smi_msg(smi_msg);
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 6bdf624c37ce..4a9e9de4d684 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -809,6 +809,12 @@ restart:
*/
return_hosed_msg(smi_info, IPMI_BUS_ERR);
}
+ if (smi_info->waiting_msg != NULL) {
+ /* Also handle if there was a message waiting. */
+ smi_info->curr_msg = smi_info->waiting_msg;
+ smi_info->waiting_msg = NULL;
+ return_hosed_msg(smi_info, IPMI_BUS_ERR);
+ }
smi_mod_timer(smi_info, jiffies + SI_TIMEOUT_HOSED);
goto out;
}
@@ -918,9 +924,14 @@ static int sender(void *send_info, struct ipmi_smi_msg *msg)
{
struct smi_info *smi_info = send_info;
unsigned long flags;
+ int rv = IPMI_CC_NO_ERROR;
debug_timestamp(smi_info, "Enqueue");
+ /*
+ * Check here for run to completion mode. A check under lock is
+ * later.
+ */
if (smi_info->si_state == SI_HOSED)
return IPMI_BUS_ERR;
@@ -934,18 +945,15 @@ static int sender(void *send_info, struct ipmi_smi_msg *msg)
}
spin_lock_irqsave(&smi_info->si_lock, flags);
- /*
- * The following two lines don't need to be under the lock for
- * the lock's sake, but they do need SMP memory barriers to
- * avoid getting things out of order. We are already claiming
- * the lock, anyway, so just do it under the lock to avoid the
- * ordering problem.
- */
- BUG_ON(smi_info->waiting_msg);
- smi_info->waiting_msg = msg;
- check_start_timer_thread(smi_info);
+ if (smi_info->si_state == SI_HOSED) {
+ rv = IPMI_BUS_ERR;
+ } else {
+ BUG_ON(smi_info->waiting_msg);
+ smi_info->waiting_msg = msg;
+ check_start_timer_thread(smi_info);
+ }
spin_unlock_irqrestore(&smi_info->si_lock, flags);
- return IPMI_CC_NO_ERROR;
+ return rv;
}
static void set_run_to_completion(void *send_info, bool i_run_to_completion)
@@ -1113,7 +1121,9 @@ static void smi_timeout(struct timer_list *t)
* SI_USEC_PER_JIFFY);
smi_result = smi_event_handler(smi_info, time_diff);
- if ((smi_info->io.irq) && (!smi_info->interrupt_disabled)) {
+ if (smi_info->si_state == SI_HOSED) {
+ timeout = jiffies + SI_TIMEOUT_HOSED;
+ } else if ((smi_info->io.irq) && (!smi_info->interrupt_disabled)) {
/* Running with interrupts, only do long timeouts. */
timeout = jiffies + SI_TIMEOUT_JIFFIES;
smi_inc_stat(smi_info, long_timeouts);
@@ -2226,7 +2236,8 @@ static void wait_msg_processed(struct smi_info *smi_info)
unsigned long jiffies_now;
long time_diff;
- while (smi_info->curr_msg || (smi_info->si_state != SI_NORMAL)) {
+ while (smi_info->si_state != SI_HOSED &&
+ (smi_info->curr_msg || (smi_info->si_state != SI_NORMAL))) {
jiffies_now = jiffies;
time_diff = (((long)jiffies_now - (long)smi_info->last_timeout_jiffies)
* SI_USEC_PER_JIFFY);
diff --git a/drivers/char/ipmi/ipmi_si_ls2k.c b/drivers/char/ipmi/ipmi_si_ls2k.c
index 45442c257efd..4c1da80f256c 100644
--- a/drivers/char/ipmi/ipmi_si_ls2k.c
+++ b/drivers/char/ipmi/ipmi_si_ls2k.c
@@ -168,7 +168,7 @@ static void ipmi_ls2k_remove(struct platform_device *pdev)
ipmi_si_remove_by_dev(&pdev->dev);
}
-struct platform_driver ipmi_ls2k_platform_driver = {
+static struct platform_driver ipmi_ls2k_platform_driver = {
.driver = {
.name = "ls2k-ipmi-si",
},
diff --git a/drivers/clk/imx/clk-imx8qxp.c b/drivers/clk/imx/clk-imx8qxp.c
index 3ae162625bb1..c781425a005e 100644
--- a/drivers/clk/imx/clk-imx8qxp.c
+++ b/drivers/clk/imx/clk-imx8qxp.c
@@ -346,7 +346,29 @@ static struct platform_driver imx8qxp_clk_driver = {
},
.probe = imx8qxp_clk_probe,
};
-module_platform_driver(imx8qxp_clk_driver);
+
+static int __init imx8qxp_clk_init(void)
+{
+ int ret;
+
+ ret = platform_driver_register(&imx8qxp_clk_driver);
+ if (ret)
+ return ret;
+
+ ret = imx_clk_scu_module_init();
+ if (ret)
+ platform_driver_unregister(&imx8qxp_clk_driver);
+
+ return ret;
+}
+module_init(imx8qxp_clk_init);
+
+static void __exit imx8qxp_clk_exit(void)
+{
+ imx_clk_scu_module_exit();
+ platform_driver_unregister(&imx8qxp_clk_driver);
+}
+module_exit(imx8qxp_clk_exit);
MODULE_AUTHOR("Aisheng Dong <aisheng.dong@nxp.com>");
MODULE_DESCRIPTION("NXP i.MX8QXP clock driver");
diff --git a/drivers/clk/imx/clk-scu.c b/drivers/clk/imx/clk-scu.c
index 33e637ad3579..a85ec48a798b 100644
--- a/drivers/clk/imx/clk-scu.c
+++ b/drivers/clk/imx/clk-scu.c
@@ -191,6 +191,16 @@ static bool imx_scu_clk_is_valid(u32 rsrc_id)
return p != NULL;
}
+int __init imx_clk_scu_module_init(void)
+{
+ return platform_driver_register(&imx_clk_scu_driver);
+}
+
+void __exit imx_clk_scu_module_exit(void)
+{
+ return platform_driver_unregister(&imx_clk_scu_driver);
+}
+
int imx_clk_scu_init(struct device_node *np,
const struct imx_clk_scu_rsrc_table *data)
{
@@ -215,7 +225,7 @@ int imx_clk_scu_init(struct device_node *np,
rsrc_table = data;
}
- return platform_driver_register(&imx_clk_scu_driver);
+ return 0;
}
/*
diff --git a/drivers/clk/imx/clk-scu.h b/drivers/clk/imx/clk-scu.h
index af7b697f51ca..ca82f2cce897 100644
--- a/drivers/clk/imx/clk-scu.h
+++ b/drivers/clk/imx/clk-scu.h
@@ -25,6 +25,8 @@ extern const struct imx_clk_scu_rsrc_table imx_clk_scu_rsrc_imx8dxl;
extern const struct imx_clk_scu_rsrc_table imx_clk_scu_rsrc_imx8qxp;
extern const struct imx_clk_scu_rsrc_table imx_clk_scu_rsrc_imx8qm;
+int __init imx_clk_scu_module_init(void);
+void __exit imx_clk_scu_module_exit(void);
int imx_clk_scu_init(struct device_node *np,
const struct imx_clk_scu_rsrc_table *data);
struct clk_hw *imx_scu_of_clk_src_get(struct of_phandle_args *clkspec,
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index a48af3540c74..11c58af41900 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1476,13 +1476,13 @@ static void __intel_pstate_update_max_freq(struct cpufreq_policy *policy,
refresh_frequency_limits(policy);
}
-static bool intel_pstate_update_max_freq(struct cpudata *cpudata)
+static bool intel_pstate_update_max_freq(int cpu)
{
- struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpudata->cpu);
+ struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu);
if (!policy)
return false;
- __intel_pstate_update_max_freq(policy, cpudata);
+ __intel_pstate_update_max_freq(policy, all_cpu_data[cpu]);
return true;
}
@@ -1501,7 +1501,7 @@ static void intel_pstate_update_limits_for_all(void)
int cpu;
for_each_possible_cpu(cpu)
- intel_pstate_update_max_freq(all_cpu_data[cpu]);
+ intel_pstate_update_max_freq(cpu);
mutex_lock(&hybrid_capacity_lock);
@@ -1647,8 +1647,8 @@ static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b,
static void update_cpu_qos_request(int cpu, enum freq_qos_req_type type)
{
struct cpudata *cpudata = all_cpu_data[cpu];
- unsigned int freq = cpudata->pstate.turbo_freq;
struct freq_qos_request *req;
+ unsigned int freq;
struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu);
if (!policy)
@@ -1661,6 +1661,8 @@ static void update_cpu_qos_request(int cpu, enum freq_qos_req_type type)
if (hwp_active)
intel_pstate_get_hwp_cap(cpudata);
+ freq = cpudata->pstate.turbo_freq;
+
if (type == FREQ_QOS_MIN) {
freq = DIV_ROUND_UP(freq * global.min_perf_pct, 100);
} else {
@@ -1908,7 +1910,7 @@ static void intel_pstate_notify_work(struct work_struct *work)
struct cpudata *cpudata =
container_of(to_delayed_work(work), struct cpudata, hwp_notify_work);
- if (intel_pstate_update_max_freq(cpudata)) {
+ if (intel_pstate_update_max_freq(cpudata->cpu)) {
/*
* The driver will not be unregistered while this function is
* running, so update the capacity without acquiring the driver
diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index 007b8aff0238..5b0570df0fd9 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -152,6 +152,24 @@ int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c);
int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port,
struct access_coordinate *c);
+static inline struct device *port_to_host(struct cxl_port *port)
+{
+ struct cxl_port *parent = is_cxl_root(port) ? NULL :
+ to_cxl_port(port->dev.parent);
+
+ /*
+ * The host of CXL root port and the first level of ports is
+ * the platform firmware device, the host of all other ports
+ * is their parent port.
+ */
+ if (!parent)
+ return port->uport_dev;
+ else if (is_cxl_root(parent))
+ return parent->uport_dev;
+ else
+ return &parent->dev;
+}
+
static inline struct device *dport_to_host(struct cxl_dport *dport)
{
struct cxl_port *port = dport->port;
diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
index e3f0c39e6812..c222e98ae736 100644
--- a/drivers/cxl/core/hdm.c
+++ b/drivers/cxl/core/hdm.c
@@ -904,7 +904,7 @@ static void cxl_decoder_reset(struct cxl_decoder *cxld)
if ((cxld->flags & CXL_DECODER_F_ENABLE) == 0)
return;
- if (test_bit(CXL_DECODER_F_LOCK, &cxld->flags))
+ if (cxld->flags & CXL_DECODER_F_LOCK)
return;
if (port->commit_end == id)
diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index fa6dd0c94656..e7a6452bf544 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -311,6 +311,7 @@ static bool cxl_mem_raw_command_allowed(u16 opcode)
* cxl_payload_from_user_allowed() - Check contents of in_payload.
* @opcode: The mailbox command opcode.
* @payload_in: Pointer to the input payload passed in from user space.
+ * @in_size: Size of @payload_in in bytes.
*
* Return:
* * true - payload_in passes check for @opcode.
@@ -325,12 +326,15 @@ static bool cxl_mem_raw_command_allowed(u16 opcode)
*
* The specific checks are determined by the opcode.
*/
-static bool cxl_payload_from_user_allowed(u16 opcode, void *payload_in)
+static bool cxl_payload_from_user_allowed(u16 opcode, void *payload_in,
+ size_t in_size)
{
switch (opcode) {
case CXL_MBOX_OP_SET_PARTITION_INFO: {
struct cxl_mbox_set_partition_info *pi = payload_in;
+ if (in_size < sizeof(*pi))
+ return false;
if (pi->flags & CXL_SET_PARTITION_IMMEDIATE_FLAG)
return false;
break;
@@ -338,6 +342,8 @@ static bool cxl_payload_from_user_allowed(u16 opcode, void *payload_in)
case CXL_MBOX_OP_CLEAR_LOG: {
const uuid_t *uuid = (uuid_t *)payload_in;
+ if (in_size < sizeof(uuid_t))
+ return false;
/*
* Restrict the ‘Clear log’ action to only apply to
* Vendor debug logs.
@@ -365,7 +371,8 @@ static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox_cmd,
if (IS_ERR(mbox_cmd->payload_in))
return PTR_ERR(mbox_cmd->payload_in);
- if (!cxl_payload_from_user_allowed(opcode, mbox_cmd->payload_in)) {
+ if (!cxl_payload_from_user_allowed(opcode, mbox_cmd->payload_in,
+ in_size)) {
dev_dbg(cxl_mbox->host, "%s: input payload not allowed\n",
cxl_mem_opcode_to_name(opcode));
kvfree(mbox_cmd->payload_in);
diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index f547d8ac34c7..273c22118d3d 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -1089,10 +1089,8 @@ static int cxlmd_add(struct cxl_memdev *cxlmd, struct cxl_dev_state *cxlds)
DEFINE_FREE(put_cxlmd, struct cxl_memdev *,
if (!IS_ERR_OR_NULL(_T)) put_device(&_T->dev))
-static struct cxl_memdev *cxl_memdev_autoremove(struct cxl_memdev *cxlmd)
+static bool cxl_memdev_attach_failed(struct cxl_memdev *cxlmd)
{
- int rc;
-
/*
* If @attach is provided fail if the driver is not attached upon
* return. Note that failure here could be the result of a race to
@@ -1100,7 +1098,14 @@ static struct cxl_memdev *cxl_memdev_autoremove(struct cxl_memdev *cxlmd)
* succeeded and then cxl_mem unbound before the lock is acquired.
*/
guard(device)(&cxlmd->dev);
- if (cxlmd->attach && !cxlmd->dev.driver) {
+ return (cxlmd->attach && !cxlmd->dev.driver);
+}
+
+static struct cxl_memdev *cxl_memdev_autoremove(struct cxl_memdev *cxlmd)
+{
+ int rc;
+
+ if (cxl_memdev_attach_failed(cxlmd)) {
cxl_memdev_unregister(cxlmd);
return ERR_PTR(-ENXIO);
}
diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c
index 3c6e76721522..68462e38a977 100644
--- a/drivers/cxl/core/pmem.c
+++ b/drivers/cxl/core/pmem.c
@@ -115,15 +115,17 @@ static void unregister_nvb(void *_cxl_nvb)
device_unregister(&cxl_nvb->dev);
}
-/**
- * devm_cxl_add_nvdimm_bridge() - add the root of a LIBNVDIMM topology
- * @host: platform firmware root device
- * @port: CXL port at the root of a CXL topology
- *
- * Return: bridge device that can host cxl_nvdimm objects
- */
-struct cxl_nvdimm_bridge *devm_cxl_add_nvdimm_bridge(struct device *host,
- struct cxl_port *port)
+static bool cxl_nvdimm_bridge_failed_attach(struct cxl_nvdimm_bridge *cxl_nvb)
+{
+ struct device *dev = &cxl_nvb->dev;
+
+ guard(device)(dev);
+ /* If the device has no driver, then it failed to attach. */
+ return dev->driver == NULL;
+}
+
+struct cxl_nvdimm_bridge *__devm_cxl_add_nvdimm_bridge(struct device *host,
+ struct cxl_port *port)
{
struct cxl_nvdimm_bridge *cxl_nvb;
struct device *dev;
@@ -145,6 +147,11 @@ struct cxl_nvdimm_bridge *devm_cxl_add_nvdimm_bridge(struct device *host,
if (rc)
goto err;
+ if (cxl_nvdimm_bridge_failed_attach(cxl_nvb)) {
+ unregister_nvb(cxl_nvb);
+ return ERR_PTR(-ENODEV);
+ }
+
rc = devm_add_action_or_reset(host, unregister_nvb, cxl_nvb);
if (rc)
return ERR_PTR(rc);
@@ -155,7 +162,7 @@ err:
put_device(dev);
return ERR_PTR(rc);
}
-EXPORT_SYMBOL_NS_GPL(devm_cxl_add_nvdimm_bridge, "CXL");
+EXPORT_SYMBOL_FOR_MODULES(__devm_cxl_add_nvdimm_bridge, "cxl_pmem");
static void cxl_nvdimm_release(struct device *dev)
{
@@ -255,6 +262,21 @@ int devm_cxl_add_nvdimm(struct device *host, struct cxl_port *port,
if (!cxl_nvb)
return -ENODEV;
+ /*
+ * Take the uport_dev lock to guard against race of nvdimm_bus object.
+ * cxl_acpi_probe() registers the nvdimm_bus and is done under the
+ * root port uport_dev lock.
+ *
+ * Take the cxl_nvb device lock to ensure that cxl_nvb driver is in a
+ * consistent state. And the driver registers nvdimm_bus.
+ */
+ guard(device)(cxl_nvb->port->uport_dev);
+ guard(device)(&cxl_nvb->dev);
+ if (!cxl_nvb->nvdimm_bus) {
+ rc = -ENODEV;
+ goto err_alloc;
+ }
+
cxl_nvd = cxl_nvdimm_alloc(cxl_nvb, cxlmd);
if (IS_ERR(cxl_nvd)) {
rc = PTR_ERR(cxl_nvd);
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index b69c2529744c..0c5957d1d329 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -615,22 +615,8 @@ struct cxl_port *parent_port_of(struct cxl_port *port)
static void unregister_port(void *_port)
{
struct cxl_port *port = _port;
- struct cxl_port *parent = parent_port_of(port);
- struct device *lock_dev;
- /*
- * CXL root port's and the first level of ports are unregistered
- * under the platform firmware device lock, all other ports are
- * unregistered while holding their parent port lock.
- */
- if (!parent)
- lock_dev = port->uport_dev;
- else if (is_cxl_root(parent))
- lock_dev = parent->uport_dev;
- else
- lock_dev = &parent->dev;
-
- device_lock_assert(lock_dev);
+ device_lock_assert(port_to_host(port));
port->dead = true;
device_unregister(&port->dev);
}
@@ -1427,20 +1413,11 @@ static struct device *grandparent(struct device *dev)
return NULL;
}
-static struct device *endpoint_host(struct cxl_port *endpoint)
-{
- struct cxl_port *port = to_cxl_port(endpoint->dev.parent);
-
- if (is_cxl_root(port))
- return port->uport_dev;
- return &port->dev;
-}
-
static void delete_endpoint(void *data)
{
struct cxl_memdev *cxlmd = data;
struct cxl_port *endpoint = cxlmd->endpoint;
- struct device *host = endpoint_host(endpoint);
+ struct device *host = port_to_host(endpoint);
scoped_guard(device, host) {
if (host->driver && !endpoint->dead) {
@@ -1456,7 +1433,7 @@ static void delete_endpoint(void *data)
int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint)
{
- struct device *host = endpoint_host(endpoint);
+ struct device *host = port_to_host(endpoint);
struct device *dev = &cxlmd->dev;
get_device(host);
@@ -1790,7 +1767,16 @@ static struct cxl_dport *find_or_add_dport(struct cxl_port *port,
{
struct cxl_dport *dport;
- device_lock_assert(&port->dev);
+ /*
+ * The port is already visible in CXL hierarchy, but it may still
+ * be in the process of binding to the CXL port driver at this point.
+ *
+ * port creation and driver binding are protected by the port's host
+ * lock, so acquire the host lock here to ensure the port has completed
+ * driver binding before proceeding with dport addition.
+ */
+ guard(device)(port_to_host(port));
+ guard(device)(&port->dev);
dport = cxl_find_dport_by_dev(port, dport_dev);
if (!dport) {
dport = probe_dport(port, dport_dev);
@@ -1857,13 +1843,11 @@ retry:
* RP port enumerated by cxl_acpi without dport will
* have the dport added here.
*/
- scoped_guard(device, &port->dev) {
- dport = find_or_add_dport(port, dport_dev);
- if (IS_ERR(dport)) {
- if (PTR_ERR(dport) == -EAGAIN)
- goto retry;
- return PTR_ERR(dport);
- }
+ dport = find_or_add_dport(port, dport_dev);
+ if (IS_ERR(dport)) {
+ if (PTR_ERR(dport) == -EAGAIN)
+ goto retry;
+ return PTR_ERR(dport);
}
rc = cxl_add_ep(dport, &cxlmd->dev);
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index fec37af1dfbf..42874948b589 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -1100,12 +1100,12 @@ static int cxl_rr_assign_decoder(struct cxl_port *port, struct cxl_region *cxlr,
static void cxl_region_setup_flags(struct cxl_region *cxlr,
struct cxl_decoder *cxld)
{
- if (test_bit(CXL_DECODER_F_LOCK, &cxld->flags)) {
+ if (cxld->flags & CXL_DECODER_F_LOCK) {
set_bit(CXL_REGION_F_LOCK, &cxlr->flags);
clear_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
}
- if (test_bit(CXL_DECODER_F_NORMALIZED_ADDRESSING, &cxld->flags))
+ if (cxld->flags & CXL_DECODER_F_NORMALIZED_ADDRESSING)
set_bit(CXL_REGION_F_NORMALIZED_ADDRESSING, &cxlr->flags);
}
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 04c673e7cdb0..9b947286eb9b 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -574,11 +574,16 @@ struct cxl_nvdimm_bridge {
#define CXL_DEV_ID_LEN 19
+enum {
+ CXL_NVD_F_INVALIDATED = 0,
+};
+
struct cxl_nvdimm {
struct device dev;
struct cxl_memdev *cxlmd;
u8 dev_id[CXL_DEV_ID_LEN]; /* for nvdimm, string of 'serial' */
u64 dirty_shutdowns;
+ unsigned long flags;
};
struct cxl_pmem_region_mapping {
@@ -920,6 +925,8 @@ void cxl_driver_unregister(struct cxl_driver *cxl_drv);
struct cxl_nvdimm_bridge *to_cxl_nvdimm_bridge(struct device *dev);
struct cxl_nvdimm_bridge *devm_cxl_add_nvdimm_bridge(struct device *host,
struct cxl_port *port);
+struct cxl_nvdimm_bridge *__devm_cxl_add_nvdimm_bridge(struct device *host,
+ struct cxl_port *port);
struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev);
bool is_cxl_nvdimm(struct device *dev);
int devm_cxl_add_nvdimm(struct device *host, struct cxl_port *port,
diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c
index 6a97e4e490b6..082ec0f1c3a0 100644
--- a/drivers/cxl/pmem.c
+++ b/drivers/cxl/pmem.c
@@ -13,6 +13,20 @@
static __read_mostly DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX);
+/**
+ * devm_cxl_add_nvdimm_bridge() - add the root of a LIBNVDIMM topology
+ * @host: platform firmware root device
+ * @port: CXL port at the root of a CXL topology
+ *
+ * Return: bridge device that can host cxl_nvdimm objects
+ */
+struct cxl_nvdimm_bridge *devm_cxl_add_nvdimm_bridge(struct device *host,
+ struct cxl_port *port)
+{
+ return __devm_cxl_add_nvdimm_bridge(host, port);
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_add_nvdimm_bridge, "CXL");
+
static void clear_exclusive(void *mds)
{
clear_exclusive_cxl_commands(mds, exclusive_cmds);
@@ -129,6 +143,9 @@ static int cxl_nvdimm_probe(struct device *dev)
struct nvdimm *nvdimm;
int rc;
+ if (test_bit(CXL_NVD_F_INVALIDATED, &cxl_nvd->flags))
+ return -EBUSY;
+
set_exclusive_cxl_commands(mds, exclusive_cmds);
rc = devm_add_action_or_reset(dev, clear_exclusive, mds);
if (rc)
@@ -309,8 +326,10 @@ static int detach_nvdimm(struct device *dev, void *data)
scoped_guard(device, dev) {
if (dev->driver) {
cxl_nvd = to_cxl_nvdimm(dev);
- if (cxl_nvd->cxlmd && cxl_nvd->cxlmd->cxl_nvb == data)
+ if (cxl_nvd->cxlmd && cxl_nvd->cxlmd->cxl_nvb == data) {
release = true;
+ set_bit(CXL_NVD_F_INVALIDATED, &cxl_nvd->flags);
+ }
}
}
if (release)
@@ -353,6 +372,7 @@ static struct cxl_driver cxl_nvdimm_bridge_driver = {
.probe = cxl_nvdimm_bridge_probe,
.id = CXL_DEVICE_NVDIMM_BRIDGE,
.drv = {
+ .probe_type = PROBE_FORCE_SYNCHRONOUS,
.suppress_bind_attrs = true,
},
};
diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c
index 1c868c1e4a49..8153d62c58f0 100644
--- a/drivers/firewire/ohci.c
+++ b/drivers/firewire/ohci.c
@@ -848,7 +848,7 @@ static int ar_context_init(struct ar_context *ctx, struct fw_ohci *ohci,
{
struct device *dev = ohci->card.device;
unsigned int i;
- struct page *pages[AR_BUFFERS + AR_WRAPAROUND_PAGES];
+ struct page *pages[AR_BUFFERS + AR_WRAPAROUND_PAGES] = { NULL };
dma_addr_t dma_addrs[AR_BUFFERS];
void *vaddr;
struct descriptor *d;
diff --git a/drivers/gpio/gpiolib-shared.c b/drivers/gpio/gpiolib-shared.c
index d2614ace4de1..17a7128b6bd9 100644
--- a/drivers/gpio/gpiolib-shared.c
+++ b/drivers/gpio/gpiolib-shared.c
@@ -748,14 +748,14 @@ static bool gpio_shared_entry_is_really_shared(struct gpio_shared_entry *entry)
static void gpio_shared_free_exclusive(void)
{
struct gpio_shared_entry *entry, *epos;
+ struct gpio_shared_ref *ref, *rpos;
list_for_each_entry_safe(entry, epos, &gpio_shared_list, list) {
if (gpio_shared_entry_is_really_shared(entry))
continue;
- gpio_shared_drop_ref(list_first_entry(&entry->refs,
- struct gpio_shared_ref,
- list));
+ list_for_each_entry_safe(ref, rpos, &entry->refs, list)
+ gpio_shared_drop_ref(ref);
gpio_shared_drop_entry(entry);
}
}
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 86a171e96b0e..ada572aaebd6 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -3267,8 +3267,12 @@ static int gpiochip_get(struct gpio_chip *gc, unsigned int offset)
/* Make sure this is called after checking for gc->get(). */
ret = gc->get(gc, offset);
- if (ret > 1)
- ret = -EBADE;
+ if (ret > 1) {
+ gpiochip_warn(gc,
+ "invalid return value from gc->get(): %d, consider fixing the driver\n",
+ ret);
+ ret = !!ret;
+ }
return ret;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
index afe5ca81beec..db7858fe0c3d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
@@ -641,6 +641,7 @@ static void aca_error_fini(struct aca_error *aerr)
aca_bank_error_remove(aerr, bank_error);
out_unlock:
+ mutex_unlock(&aerr->lock);
mutex_destroy(&aerr->lock);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index d9789e0b5201..3e19b51a2763 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -7059,6 +7059,15 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
dev_info(adev->dev, "PCI error: slot reset callback!!\n");
memset(&reset_context, 0, sizeof(reset_context));
+ INIT_LIST_HEAD(&device_list);
+ hive = amdgpu_get_xgmi_hive(adev);
+ if (hive) {
+ mutex_lock(&hive->hive_lock);
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
+ list_add_tail(&tmp_adev->reset_list, &device_list);
+ } else {
+ list_add_tail(&adev->reset_list, &device_list);
+ }
if (adev->pcie_reset_ctx.swus)
link_dev = adev->pcie_reset_ctx.swus;
@@ -7099,19 +7108,13 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
reset_context.reset_req_dev = adev;
set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
- INIT_LIST_HEAD(&device_list);
- hive = amdgpu_get_xgmi_hive(adev);
if (hive) {
- mutex_lock(&hive->hive_lock);
reset_context.hive = hive;
- list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
tmp_adev->pcie_reset_ctx.in_link_reset = true;
- list_add_tail(&tmp_adev->reset_list, &device_list);
- }
} else {
set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
- list_add_tail(&adev->reset_list, &device_list);
}
r = amdgpu_device_asic_reset(adev, &device_list, &reset_context);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c
index 6e8aad91bcd3..0d3c18f04ac3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c
@@ -332,13 +332,13 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size
if (!context || !context->initialized) {
dev_err(adev->dev, "TA is not initialized\n");
ret = -EINVAL;
- goto err_free_shared_buf;
+ goto free_shared_buf;
}
if (!psp->ta_funcs || !psp->ta_funcs->fn_ta_invoke) {
dev_err(adev->dev, "Unsupported function to invoke TA\n");
ret = -EOPNOTSUPP;
- goto err_free_shared_buf;
+ goto free_shared_buf;
}
context->session_id = ta_id;
@@ -346,7 +346,7 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size
mutex_lock(&psp->ras_context.mutex);
ret = prep_ta_mem_context(&context->mem_context, shared_buf, shared_buf_len);
if (ret)
- goto err_free_shared_buf;
+ goto unlock;
ret = psp_fn_ta_invoke(psp, cmd_id);
if (ret || context->resp_status) {
@@ -354,15 +354,17 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size
ret, context->resp_status);
if (!ret) {
ret = -EINVAL;
- goto err_free_shared_buf;
+ goto unlock;
}
}
if (copy_to_user((char *)&buf[copy_pos], context->mem_context.shared_buf, shared_buf_len))
ret = -EFAULT;
-err_free_shared_buf:
+unlock:
mutex_unlock(&psp->ras_context.mutex);
+
+free_shared_buf:
kfree(shared_buf);
return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
index 8013260e29dc..7e9cf1868cc9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -35,6 +35,8 @@
static const struct dma_fence_ops amdgpu_userq_fence_ops;
static struct kmem_cache *amdgpu_userq_fence_slab;
+#define AMDGPU_USERQ_MAX_HANDLES (1U << 16)
+
int amdgpu_userq_fence_slab_init(void)
{
amdgpu_userq_fence_slab = kmem_cache_create("amdgpu_userq_fence",
@@ -478,6 +480,11 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
if (!amdgpu_userq_enabled(dev))
return -ENOTSUPP;
+ if (args->num_syncobj_handles > AMDGPU_USERQ_MAX_HANDLES ||
+ args->num_bo_write_handles > AMDGPU_USERQ_MAX_HANDLES ||
+ args->num_bo_read_handles > AMDGPU_USERQ_MAX_HANDLES)
+ return -EINVAL;
+
num_syncobj_handles = args->num_syncobj_handles;
syncobj_handles = memdup_user(u64_to_user_ptr(args->syncobj_handles),
size_mul(sizeof(u32), num_syncobj_handles));
@@ -664,6 +671,11 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
if (!amdgpu_userq_enabled(dev))
return -ENOTSUPP;
+ if (wait_info->num_syncobj_handles > AMDGPU_USERQ_MAX_HANDLES ||
+ wait_info->num_bo_write_handles > AMDGPU_USERQ_MAX_HANDLES ||
+ wait_info->num_bo_read_handles > AMDGPU_USERQ_MAX_HANDLES)
+ return -EINVAL;
+
num_read_bo_handles = wait_info->num_bo_read_handles;
bo_handles_read = memdup_user(u64_to_user_ptr(wait_info->bo_read_handles),
size_mul(sizeof(u32), num_read_bo_handles));
@@ -833,7 +845,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv,
DMA_RESV_USAGE_READ, fence) {
- if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
+ if (num_fences >= wait_info->num_fences) {
r = -EINVAL;
goto free_fences;
}
@@ -850,7 +862,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv,
DMA_RESV_USAGE_WRITE, fence) {
- if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
+ if (num_fences >= wait_info->num_fences) {
r = -EINVAL;
goto free_fences;
}
@@ -874,8 +886,9 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
goto free_fences;
dma_fence_unwrap_for_each(f, &iter, fence) {
- if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
+ if (num_fences >= wait_info->num_fences) {
r = -EINVAL;
+ dma_fence_put(fence);
goto free_fences;
}
@@ -898,8 +911,9 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
if (r)
goto free_fences;
- if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
+ if (num_fences >= wait_info->num_fences) {
r = -EINVAL;
+ dma_fence_put(fence);
goto free_fences;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 09ebb13ca5e8..a926a330700e 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -720,11 +720,6 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
mes_set_hw_res_pkt.enable_reg_active_poll = 1;
mes_set_hw_res_pkt.enable_level_process_quantum_check = 1;
mes_set_hw_res_pkt.oversubscription_timer = 50;
- if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x7f)
- mes_set_hw_res_pkt.enable_lr_compute_wa = 1;
- else
- dev_info_once(mes->adev->dev,
- "MES FW version must be >= 0x7f to enable LR compute workaround.\n");
if (amdgpu_mes_log_enable) {
mes_set_hw_res_pkt.enable_mes_event_int_logging = 1;
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
index b1c864dc79a8..5bfa5d1d0b36 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
@@ -779,11 +779,6 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe)
mes_set_hw_res_pkt.use_different_vmid_compute = 1;
mes_set_hw_res_pkt.enable_reg_active_poll = 1;
mes_set_hw_res_pkt.enable_level_process_quantum_check = 1;
- if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x82)
- mes_set_hw_res_pkt.enable_lr_compute_wa = 1;
- else
- dev_info_once(adev->dev,
- "MES FW version must be >= 0x82 to enable LR compute workaround.\n");
/*
* Keep oversubscribe timer for sdma . When we have unmapped doorbell
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
index 0202df5db1e1..6109124f852e 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
@@ -174,6 +174,10 @@ static int vcn_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block)
fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
fw_shared->sq.is_enabled = 1;
+ fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG);
+ fw_shared->smu_dpm_interface.smu_interface_type = (adev->flags & AMD_IS_APU) ?
+ AMDGPU_VCN_SMU_DPM_INTERFACE_APU : AMDGPU_VCN_SMU_DPM_INTERFACE_DGPU;
+
if (amdgpu_vcnfw_log)
amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
index 246893d80f1f..baf820e6eae8 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
@@ -170,11 +170,11 @@ struct dc_stream_state *dc_create_stream_for_sink(
if (sink == NULL)
goto fail;
- stream = kzalloc_obj(struct dc_stream_state);
+ stream = kzalloc_obj(struct dc_stream_state, GFP_ATOMIC);
if (stream == NULL)
goto fail;
- stream->update_scratch = kzalloc((int32_t) dc_update_scratch_space_size(), GFP_KERNEL);
+ stream->update_scratch = kzalloc((int32_t) dc_update_scratch_space_size(), GFP_ATOMIC);
if (stream->update_scratch == NULL)
goto fail;
diff --git a/drivers/gpu/drm/bridge/samsung-dsim.c b/drivers/gpu/drm/bridge/samsung-dsim.c
index 930aaa659c97..ec632f268644 100644
--- a/drivers/gpu/drm/bridge/samsung-dsim.c
+++ b/drivers/gpu/drm/bridge/samsung-dsim.c
@@ -1881,6 +1881,14 @@ static int samsung_dsim_register_te_irq(struct samsung_dsim *dsi, struct device
return 0;
}
+static void samsung_dsim_unregister_te_irq(struct samsung_dsim *dsi)
+{
+ if (dsi->te_gpio) {
+ free_irq(gpiod_to_irq(dsi->te_gpio), dsi);
+ gpiod_put(dsi->te_gpio);
+ }
+}
+
static int samsung_dsim_host_attach(struct mipi_dsi_host *host,
struct mipi_dsi_device *device)
{
@@ -1961,7 +1969,7 @@ of_find_panel_or_bridge:
if (!(device->mode_flags & MIPI_DSI_MODE_VIDEO)) {
ret = samsung_dsim_register_te_irq(dsi, &device->dev);
if (ret)
- return ret;
+ goto err_remove_bridge;
}
// The next bridge can be used by host_ops->attach
@@ -1982,15 +1990,12 @@ of_find_panel_or_bridge:
err_release_next_bridge:
drm_bridge_put(dsi->bridge.next_bridge);
dsi->bridge.next_bridge = NULL;
- return ret;
-}
-static void samsung_dsim_unregister_te_irq(struct samsung_dsim *dsi)
-{
- if (dsi->te_gpio) {
- free_irq(gpiod_to_irq(dsi->te_gpio), dsi);
- gpiod_put(dsi->te_gpio);
- }
+ if (!(device->mode_flags & MIPI_DSI_MODE_VIDEO))
+ samsung_dsim_unregister_te_irq(dsi);
+err_remove_bridge:
+ drm_bridge_remove(&dsi->bridge);
+ return ret;
}
static int samsung_dsim_host_detach(struct mipi_dsi_host *host,
diff --git a/drivers/gpu/drm/bridge/synopsys/dw-dp.c b/drivers/gpu/drm/bridge/synopsys/dw-dp.c
index 4ab6922dd79c..fd23ca2834b0 100644
--- a/drivers/gpu/drm/bridge/synopsys/dw-dp.c
+++ b/drivers/gpu/drm/bridge/synopsys/dw-dp.c
@@ -2049,7 +2049,9 @@ struct dw_dp *dw_dp_bind(struct device *dev, struct drm_encoder *encoder,
bridge->type = DRM_MODE_CONNECTOR_DisplayPort;
bridge->ycbcr_420_allowed = true;
- devm_drm_bridge_add(dev, bridge);
+ ret = devm_drm_bridge_add(dev, bridge);
+ if (ret)
+ return ERR_PTR(ret);
dp->aux.dev = dev;
dp->aux.drm_dev = encoder->dev;
diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
index 276d05d25ad8..98d64ad791d0 100644
--- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c
+++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
@@ -1415,6 +1415,7 @@ static int ti_sn_bridge_probe(struct auxiliary_device *adev,
{
struct ti_sn65dsi86 *pdata = dev_get_drvdata(adev->dev.parent);
struct device_node *np = pdata->dev->of_node;
+ const struct i2c_client *client = to_i2c_client(pdata->dev);
int ret;
pdata->next_bridge = devm_drm_of_get_bridge(&adev->dev, np, 1, 0);
@@ -1433,8 +1434,9 @@ static int ti_sn_bridge_probe(struct auxiliary_device *adev,
? DRM_MODE_CONNECTOR_DisplayPort : DRM_MODE_CONNECTOR_eDP;
if (pdata->bridge.type == DRM_MODE_CONNECTOR_DisplayPort) {
- pdata->bridge.ops = DRM_BRIDGE_OP_EDID | DRM_BRIDGE_OP_DETECT |
- DRM_BRIDGE_OP_HPD;
+ pdata->bridge.ops = DRM_BRIDGE_OP_EDID | DRM_BRIDGE_OP_DETECT;
+ if (client->irq)
+ pdata->bridge.ops |= DRM_BRIDGE_OP_HPD;
/*
* If comms were already enabled they would have been enabled
* with the wrong value of HPD_DISABLE. Update it now. Comms
diff --git a/drivers/gpu/drm/drm_client_modeset.c b/drivers/gpu/drm/drm_client_modeset.c
index 262b1b8773c5..bb49b8361271 100644
--- a/drivers/gpu/drm/drm_client_modeset.c
+++ b/drivers/gpu/drm/drm_client_modeset.c
@@ -930,7 +930,8 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width,
mutex_unlock(&client->modeset_mutex);
out:
kfree(crtcs);
- modes_destroy(dev, modes, connector_count);
+ if (modes)
+ modes_destroy(dev, modes, connector_count);
kfree(modes);
kfree(offsets);
kfree(enabled);
diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c
index 24180bfdf5a2..9ef9e52c0547 100644
--- a/drivers/gpu/drm/drm_gpusvm.c
+++ b/drivers/gpu/drm/drm_gpusvm.c
@@ -1338,14 +1338,14 @@ bool drm_gpusvm_range_pages_valid(struct drm_gpusvm *gpusvm,
EXPORT_SYMBOL_GPL(drm_gpusvm_range_pages_valid);
/**
- * drm_gpusvm_range_pages_valid_unlocked() - GPU SVM range pages valid unlocked
+ * drm_gpusvm_pages_valid_unlocked() - GPU SVM pages valid unlocked
* @gpusvm: Pointer to the GPU SVM structure
- * @range: Pointer to the GPU SVM range structure
+ * @svm_pages: Pointer to the GPU SVM pages structure
*
- * This function determines if a GPU SVM range pages are valid. Expected be
- * called without holding gpusvm->notifier_lock.
+ * This function determines if a GPU SVM pages are valid. Expected be called
+ * without holding gpusvm->notifier_lock.
*
- * Return: True if GPU SVM range has valid pages, False otherwise
+ * Return: True if GPU SVM pages are valid, False otherwise
*/
static bool drm_gpusvm_pages_valid_unlocked(struct drm_gpusvm *gpusvm,
struct drm_gpusvm_pages *svm_pages)
diff --git a/drivers/gpu/drm/i915/display/intel_alpm.c b/drivers/gpu/drm/i915/display/intel_alpm.c
index 7ce8c674bb03..07ffee38974b 100644
--- a/drivers/gpu/drm/i915/display/intel_alpm.c
+++ b/drivers/gpu/drm/i915/display/intel_alpm.c
@@ -562,12 +562,7 @@ void intel_alpm_disable(struct intel_dp *intel_dp)
mutex_lock(&intel_dp->alpm.lock);
intel_de_rmw(display, ALPM_CTL(display, cpu_transcoder),
- ALPM_CTL_ALPM_ENABLE | ALPM_CTL_LOBF_ENABLE |
- ALPM_CTL_ALPM_AUX_LESS_ENABLE, 0);
-
- intel_de_rmw(display,
- PORT_ALPM_CTL(cpu_transcoder),
- PORT_ALPM_CTL_ALPM_AUX_LESS_ENABLE, 0);
+ ALPM_CTL_ALPM_ENABLE | ALPM_CTL_LOBF_ENABLE, 0);
drm_dbg_kms(display->drm, "Disabling ALPM\n");
mutex_unlock(&intel_dp->alpm.lock);
diff --git a/drivers/gpu/drm/imx/ipuv3/parallel-display.c b/drivers/gpu/drm/imx/ipuv3/parallel-display.c
index dea85577513a..4ce772bc3cb3 100644
--- a/drivers/gpu/drm/imx/ipuv3/parallel-display.c
+++ b/drivers/gpu/drm/imx/ipuv3/parallel-display.c
@@ -256,7 +256,9 @@ static int imx_pd_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, imxpd);
- devm_drm_bridge_add(dev, &imxpd->bridge);
+ ret = devm_drm_bridge_add(dev, &imxpd->bridge);
+ if (ret)
+ return ret;
return component_add(dev, &imx_pd_ops);
}
diff --git a/drivers/gpu/drm/logicvc/logicvc_drm.c b/drivers/gpu/drm/logicvc/logicvc_drm.c
index 204b0fee55d0..bbebf4fc7f51 100644
--- a/drivers/gpu/drm/logicvc/logicvc_drm.c
+++ b/drivers/gpu/drm/logicvc/logicvc_drm.c
@@ -92,7 +92,6 @@ static int logicvc_drm_config_parse(struct logicvc_drm *logicvc)
struct device *dev = drm_dev->dev;
struct device_node *of_node = dev->of_node;
struct logicvc_drm_config *config = &logicvc->config;
- struct device_node *layers_node;
int ret;
logicvc_of_property_parse_bool(of_node, LOGICVC_OF_PROPERTY_DITHERING,
@@ -128,7 +127,8 @@ static int logicvc_drm_config_parse(struct logicvc_drm *logicvc)
if (ret)
return ret;
- layers_node = of_get_child_by_name(of_node, "layers");
+ struct device_node *layers_node __free(device_node) =
+ of_get_child_by_name(of_node, "layers");
if (!layers_node) {
drm_err(drm_dev, "Missing non-optional layers node\n");
return -EINVAL;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c
index e10192fe1d0f..0121d5639471 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c
@@ -737,8 +737,8 @@ r535_gsp_acpi_caps(acpi_handle handle, CAPS_METHOD_DATA *caps)
if (!obj)
goto done;
- if (WARN_ON(obj->type != ACPI_TYPE_BUFFER) ||
- WARN_ON(obj->buffer.length != 4))
+ if (obj->type != ACPI_TYPE_BUFFER ||
+ obj->buffer.length != 4)
goto done;
caps->status = 0;
@@ -773,8 +773,8 @@ r535_gsp_acpi_jt(acpi_handle handle, JT_METHOD_DATA *jt)
if (!obj)
goto done;
- if (WARN_ON(obj->type != ACPI_TYPE_BUFFER) ||
- WARN_ON(obj->buffer.length != 4))
+ if (obj->type != ACPI_TYPE_BUFFER ||
+ obj->buffer.length != 4)
goto done;
jt->status = 0;
@@ -861,8 +861,8 @@ r535_gsp_acpi_dod(acpi_handle handle, DOD_METHOD_DATA *dod)
_DOD = output.pointer;
- if (WARN_ON(_DOD->type != ACPI_TYPE_PACKAGE) ||
- WARN_ON(_DOD->package.count > ARRAY_SIZE(dod->acpiIdList)))
+ if (_DOD->type != ACPI_TYPE_PACKAGE ||
+ _DOD->package.count > ARRAY_SIZE(dod->acpiIdList))
return;
for (int i = 0; i < _DOD->package.count; i++) {
diff --git a/drivers/gpu/drm/tiny/sharp-memory.c b/drivers/gpu/drm/tiny/sharp-memory.c
index 64272cd0f6e2..cbf69460ebf3 100644
--- a/drivers/gpu/drm/tiny/sharp-memory.c
+++ b/drivers/gpu/drm/tiny/sharp-memory.c
@@ -541,8 +541,8 @@ static int sharp_memory_probe(struct spi_device *spi)
smd = devm_drm_dev_alloc(dev, &sharp_memory_drm_driver,
struct sharp_memory_device, drm);
- if (!smd)
- return -ENOMEM;
+ if (IS_ERR(smd))
+ return PTR_ERR(smd);
spi_set_drvdata(spi, smd);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c
index 2170b45c30e9..f45d93e8c1c8 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c
@@ -105,6 +105,7 @@ struct vmw_cmdbuf_context {
* @handle: DMA address handle for the command buffer space if @using_mob is
* false. Immutable.
* @size: The size of the command buffer space. Immutable.
+ * @id: Monotonically increasing ID of the last cmdbuf submitted.
* @num_contexts: Number of contexts actually enabled.
*/
struct vmw_cmdbuf_man {
@@ -132,6 +133,7 @@ struct vmw_cmdbuf_man {
bool has_pool;
dma_addr_t handle;
size_t size;
+ u64 id;
u32 num_contexts;
};
@@ -303,6 +305,8 @@ static int vmw_cmdbuf_header_submit(struct vmw_cmdbuf_header *header)
struct vmw_cmdbuf_man *man = header->man;
u32 val;
+ header->cb_header->id = man->id++;
+
val = upper_32_bits(header->handle);
vmw_write(man->dev_priv, SVGA_REG_COMMAND_HIGH, val);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 3057f8baa7d2..e1f18020170a 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -1143,7 +1143,7 @@ static int vmw_translate_mob_ptr(struct vmw_private *dev_priv,
ret = vmw_user_bo_lookup(sw_context->filp, handle, &vmw_bo);
if (ret != 0) {
drm_dbg(&dev_priv->drm, "Could not find or use MOB buffer.\n");
- return PTR_ERR(vmw_bo);
+ return ret;
}
vmw_bo_placement_set(vmw_bo, VMW_BO_DOMAIN_MOB, VMW_BO_DOMAIN_MOB);
ret = vmw_validation_add_bo(sw_context->ctx, vmw_bo);
@@ -1199,7 +1199,7 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv,
ret = vmw_user_bo_lookup(sw_context->filp, handle, &vmw_bo);
if (ret != 0) {
drm_dbg(&dev_priv->drm, "Could not find or use GMR region.\n");
- return PTR_ERR(vmw_bo);
+ return ret;
}
vmw_bo_placement_set(vmw_bo, VMW_BO_DOMAIN_GMR | VMW_BO_DOMAIN_VRAM,
VMW_BO_DOMAIN_GMR | VMW_BO_DOMAIN_VRAM);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
index fd4e76486f2d..45561bc1c9ef 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
@@ -260,6 +260,13 @@ out_no_dirty:
return ret;
}
+static void vmw_bo_dirty_free(struct kref *kref)
+{
+ struct vmw_bo_dirty *dirty = container_of(kref, struct vmw_bo_dirty, ref_count);
+
+ kvfree(dirty);
+}
+
/**
* vmw_bo_dirty_release - Release a dirty-tracking user from a buffer object
* @vbo: The buffer object
@@ -274,7 +281,7 @@ void vmw_bo_dirty_release(struct vmw_bo *vbo)
{
struct vmw_bo_dirty *dirty = vbo->dirty;
- if (dirty && kref_put(&dirty->ref_count, (void *)kvfree))
+ if (dirty && kref_put(&dirty->ref_count, vmw_bo_dirty_free))
vbo->dirty = NULL;
}
diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index 68172b0248a6..dc5a4fafa70c 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -96,6 +96,12 @@
#define ENABLE_SEMAPHORE_POLL_BIT REG_BIT(13)
#define RING_CMD_CCTL(base) XE_REG((base) + 0xc4, XE_REG_OPTION_MASKED)
+
+#define CS_MMIO_GROUP_INSTANCE_SELECT(base) XE_REG((base) + 0xcc)
+#define SELECTIVE_READ_ADDRESSING REG_BIT(30)
+#define SELECTIVE_READ_GROUP REG_GENMASK(29, 23)
+#define SELECTIVE_READ_INSTANCE REG_GENMASK(22, 16)
+
/*
* CMD_CCTL read/write fields take a MOCS value and _not_ a table index.
* The lsb of each can be considered a separate enabling bit for encryption.
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 9d090d0f2438..df6d04704823 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -210,11 +210,15 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q)
return ret;
}
+/* Dwords required to emit a RMW of a register */
+#define EMIT_RMW_DW 20
+
static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
{
- struct xe_reg_sr *sr = &q->hwe->reg_lrc;
+ struct xe_hw_engine *hwe = q->hwe;
+ struct xe_reg_sr *sr = &hwe->reg_lrc;
struct xe_reg_sr_entry *entry;
- int count_rmw = 0, count = 0, ret;
+ int count_rmw = 0, count_rmw_mcr = 0, count = 0, ret;
unsigned long idx;
struct xe_bb *bb;
size_t bb_len = 0;
@@ -224,6 +228,8 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
xa_for_each(&sr->xa, idx, entry) {
if (entry->reg.masked || entry->clr_bits == ~0)
++count;
+ else if (entry->reg.mcr)
+ ++count_rmw_mcr;
else
++count_rmw;
}
@@ -231,17 +237,35 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
if (count)
bb_len += count * 2 + 1;
- if (count_rmw)
- bb_len += count_rmw * 20 + 7;
+ /*
+ * RMW of MCR registers is the same as a normal RMW, except an
+ * additional LRI (3 dwords) is required per register to steer the read
+ * to a nom-terminated instance.
+ *
+ * We could probably shorten the batch slightly by eliding the
+ * steering for consecutive MCR registers that have the same
+ * group/instance target, but it's not worth the extra complexity to do
+ * so.
+ */
+ bb_len += count_rmw * EMIT_RMW_DW;
+ bb_len += count_rmw_mcr * (EMIT_RMW_DW + 3);
+
+ /*
+ * After doing all RMW, we need 7 trailing dwords to clean up,
+ * plus an additional 3 dwords to reset steering if any of the
+ * registers were MCR.
+ */
+ if (count_rmw || count_rmw_mcr)
+ bb_len += 7 + (count_rmw_mcr ? 3 : 0);
- if (q->hwe->class == XE_ENGINE_CLASS_RENDER)
+ if (hwe->class == XE_ENGINE_CLASS_RENDER)
/*
* Big enough to emit all of the context's 3DSTATE via
* xe_lrc_emit_hwe_state_instructions()
*/
- bb_len += xe_gt_lrc_size(gt, q->hwe->class) / sizeof(u32);
+ bb_len += xe_gt_lrc_size(gt, hwe->class) / sizeof(u32);
- xe_gt_dbg(gt, "LRC %s WA job: %zu dwords\n", q->hwe->name, bb_len);
+ xe_gt_dbg(gt, "LRC %s WA job: %zu dwords\n", hwe->name, bb_len);
bb = xe_bb_new(gt, bb_len, false);
if (IS_ERR(bb))
@@ -276,13 +300,23 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
}
}
- if (count_rmw) {
- /* Emit MI_MATH for each RMW reg: 20dw per reg + 7 trailing dw */
-
+ if (count_rmw || count_rmw_mcr) {
xa_for_each(&sr->xa, idx, entry) {
if (entry->reg.masked || entry->clr_bits == ~0)
continue;
+ if (entry->reg.mcr) {
+ struct xe_reg_mcr reg = { .__reg.raw = entry->reg.raw };
+ u8 group, instance;
+
+ xe_gt_mcr_get_nonterminated_steering(gt, reg, &group, &instance);
+ *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1);
+ *cs++ = CS_MMIO_GROUP_INSTANCE_SELECT(hwe->mmio_base).addr;
+ *cs++ = SELECTIVE_READ_ADDRESSING |
+ REG_FIELD_PREP(SELECTIVE_READ_GROUP, group) |
+ REG_FIELD_PREP(SELECTIVE_READ_INSTANCE, instance);
+ }
+
*cs++ = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO;
*cs++ = entry->reg.addr;
*cs++ = CS_GPR_REG(0, 0).addr;
@@ -308,8 +342,9 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
*cs++ = CS_GPR_REG(0, 0).addr;
*cs++ = entry->reg.addr;
- xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x\n",
- entry->reg.addr, entry->clr_bits, entry->set_bits);
+ xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x%s\n",
+ entry->reg.addr, entry->clr_bits, entry->set_bits,
+ entry->reg.mcr ? " (MCR)" : "");
}
/* reset used GPR */
@@ -321,6 +356,13 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
*cs++ = 0;
*cs++ = CS_GPR_REG(0, 2).addr;
*cs++ = 0;
+
+ /* reset steering */
+ if (count_rmw_mcr) {
+ *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1);
+ *cs++ = CS_MMIO_GROUP_INSTANCE_SELECT(q->hwe->mmio_base).addr;
+ *cs++ = 0;
+ }
}
cs = xe_lrc_emit_hwe_state_instructions(q, cs);
diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
index eb136390dafd..24d6d9af20d6 100644
--- a/drivers/gpu/drm/xe/xe_sync.c
+++ b/drivers/gpu/drm/xe/xe_sync.c
@@ -146,8 +146,10 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
if (!signal) {
sync->fence = drm_syncobj_fence_get(sync->syncobj);
- if (XE_IOCTL_DBG(xe, !sync->fence))
- return -EINVAL;
+ if (XE_IOCTL_DBG(xe, !sync->fence)) {
+ err = -EINVAL;
+ goto free_sync;
+ }
}
break;
@@ -167,17 +169,21 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
if (signal) {
sync->chain_fence = dma_fence_chain_alloc();
- if (!sync->chain_fence)
- return -ENOMEM;
+ if (!sync->chain_fence) {
+ err = -ENOMEM;
+ goto free_sync;
+ }
} else {
sync->fence = drm_syncobj_fence_get(sync->syncobj);
- if (XE_IOCTL_DBG(xe, !sync->fence))
- return -EINVAL;
+ if (XE_IOCTL_DBG(xe, !sync->fence)) {
+ err = -EINVAL;
+ goto free_sync;
+ }
err = dma_fence_chain_find_seqno(&sync->fence,
sync_in.timeline_value);
if (err)
- return err;
+ goto free_sync;
}
break;
@@ -200,8 +206,10 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
if (XE_IOCTL_DBG(xe, IS_ERR(sync->ufence)))
return PTR_ERR(sync->ufence);
sync->ufence_chain_fence = dma_fence_chain_alloc();
- if (!sync->ufence_chain_fence)
- return -ENOMEM;
+ if (!sync->ufence_chain_fence) {
+ err = -ENOMEM;
+ goto free_sync;
+ }
sync->ufence_syncobj = ufence_syncobj;
}
@@ -216,6 +224,10 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
sync->timeline_value = sync_in.timeline_value;
return 0;
+
+free_sync:
+ xe_sync_entry_cleanup(sync);
+ return err;
}
ALLOW_ERROR_INJECTION(xe_sync_entry_parse, ERRNO);
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 794b9778816b..78ac2ff5befd 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -6,6 +6,7 @@ menuconfig INFINIBAND
depends on INET
depends on m || IPV6 != m
depends on !ALPHA
+ select DMA_SHARED_BUFFER
select IRQ_POLL
select DIMLIB
help
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index b415d4aad04f..ee4a2bc68fb2 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -926,6 +926,13 @@ static int gid_table_setup_one(struct ib_device *ib_dev)
if (err)
return err;
+ /*
+ * Mark the device as ready for GID cache updates. This allows netdev
+ * event handlers to update the GID cache even before the device is
+ * fully registered.
+ */
+ ib_device_enable_gid_updates(ib_dev);
+
rdma_roce_rescan_device(ib_dev);
return err;
@@ -1637,6 +1644,12 @@ void ib_cache_release_one(struct ib_device *device)
void ib_cache_cleanup_one(struct ib_device *device)
{
+ /*
+ * Clear the GID updates mark first to prevent event handlers from
+ * accessing the device while it's being torn down.
+ */
+ ib_device_disable_gid_updates(device);
+
/* The cleanup function waits for all in-progress workqueue
* elements and cleans up the GID cache. This function should be
* called after the device was removed from the devices list and
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index e54c07c74575..9480d1a51c11 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -2729,6 +2729,9 @@ static int cma_listen_on_dev(struct rdma_id_private *id_priv,
*to_destroy = NULL;
if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1))
return 0;
+ if (id_priv->restricted_node_type != RDMA_NODE_UNSPECIFIED &&
+ id_priv->restricted_node_type != cma_dev->device->node_type)
+ return 0;
dev_id_priv =
__rdma_create_id(net, cma_listen_handler, id_priv,
@@ -2736,6 +2739,7 @@ static int cma_listen_on_dev(struct rdma_id_private *id_priv,
if (IS_ERR(dev_id_priv))
return PTR_ERR(dev_id_priv);
+ dev_id_priv->restricted_node_type = id_priv->restricted_node_type;
dev_id_priv->state = RDMA_CM_ADDR_BOUND;
memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv),
rdma_addr_size(cma_src_addr(id_priv)));
@@ -4194,7 +4198,7 @@ int rdma_restrict_node_type(struct rdma_cm_id *id, u8 node_type)
}
mutex_lock(&lock);
- if (id_priv->cma_dev)
+ if (READ_ONCE(id_priv->state) != RDMA_CM_IDLE)
ret = -EALREADY;
else
id_priv->restricted_node_type = node_type;
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index 05102769a918..a2c36666e6fc 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -100,6 +100,9 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter,
roce_netdev_callback cb,
void *cookie);
+void ib_device_enable_gid_updates(struct ib_device *device);
+void ib_device_disable_gid_updates(struct ib_device *device);
+
typedef int (*nldev_callback)(struct ib_device *device,
struct sk_buff *skb,
struct netlink_callback *cb,
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 1b5f1ee0a557..558b73940d66 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -93,6 +93,7 @@ static struct workqueue_struct *ib_unreg_wq;
static DEFINE_XARRAY_FLAGS(devices, XA_FLAGS_ALLOC);
static DECLARE_RWSEM(devices_rwsem);
#define DEVICE_REGISTERED XA_MARK_1
+#define DEVICE_GID_UPDATES XA_MARK_2
static u32 highest_client_id;
#define CLIENT_REGISTERED XA_MARK_1
@@ -2412,11 +2413,42 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter,
unsigned long index;
down_read(&devices_rwsem);
- xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED)
+ xa_for_each_marked(&devices, index, dev, DEVICE_GID_UPDATES)
ib_enum_roce_netdev(dev, filter, filter_cookie, cb, cookie);
up_read(&devices_rwsem);
}
+/**
+ * ib_device_enable_gid_updates - Mark device as ready for GID cache updates
+ * @device: Device to mark
+ *
+ * Called after GID table is allocated and initialized. After this mark is set,
+ * netdevice event handlers can update the device's GID cache. This allows
+ * events that arrive during device registration to be processed, avoiding
+ * stale GID entries when netdev properties change during the device
+ * registration process.
+ */
+void ib_device_enable_gid_updates(struct ib_device *device)
+{
+ down_write(&devices_rwsem);
+ xa_set_mark(&devices, device->index, DEVICE_GID_UPDATES);
+ up_write(&devices_rwsem);
+}
+
+/**
+ * ib_device_disable_gid_updates - Clear the GID updates mark
+ * @device: Device to unmark
+ *
+ * Called before GID table cleanup to prevent event handlers from accessing
+ * the device while it's being torn down.
+ */
+void ib_device_disable_gid_updates(struct ib_device *device)
+{
+ down_write(&devices_rwsem);
+ xa_clear_mark(&devices, device->index, DEVICE_GID_UPDATES);
+ up_write(&devices_rwsem);
+}
+
/*
* ib_enum_all_devs - enumerate all ib_devices
* @cb: Callback to call for each found ib_device
diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c
index f5298c33e581..d30f24b90bca 100644
--- a/drivers/infiniband/core/umem_dmabuf.c
+++ b/drivers/infiniband/core/umem_dmabuf.c
@@ -218,13 +218,11 @@ ib_umem_dmabuf_get_pinned_with_dma_device(struct ib_device *device,
err = ib_umem_dmabuf_map_pages(umem_dmabuf);
if (err)
- goto err_unpin;
+ goto err_release;
dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv);
return umem_dmabuf;
-err_unpin:
- dma_buf_unpin(umem_dmabuf->attach);
err_release:
dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv);
ib_umem_release(&umem_dmabuf->umem);
diff --git a/drivers/infiniband/core/uverbs_std_types_dmabuf.c b/drivers/infiniband/core/uverbs_std_types_dmabuf.c
index dfdfcd1d1a44..4a7f8b6f9dc8 100644
--- a/drivers/infiniband/core/uverbs_std_types_dmabuf.c
+++ b/drivers/infiniband/core/uverbs_std_types_dmabuf.c
@@ -10,6 +10,8 @@
#include "rdma_core.h"
#include "uverbs.h"
+MODULE_IMPORT_NS("DMA_BUF");
+
static int uverbs_dmabuf_attach(struct dma_buf *dmabuf,
struct dma_buf_attachment *attachment)
{
diff --git a/drivers/infiniband/hw/bng_re/bng_dev.c b/drivers/infiniband/hw/bng_re/bng_dev.c
index cf3bf08e5f26..d34b5f88cd40 100644
--- a/drivers/infiniband/hw/bng_re/bng_dev.c
+++ b/drivers/infiniband/hw/bng_re/bng_dev.c
@@ -54,9 +54,6 @@ static void bng_re_destroy_chip_ctx(struct bng_re_dev *rdev)
{
struct bng_re_chip_ctx *chip_ctx;
- if (!rdev->chip_ctx)
- return;
-
kfree(rdev->dev_attr);
rdev->dev_attr = NULL;
@@ -124,12 +121,6 @@ static int bng_re_net_ring_free(struct bng_re_dev *rdev,
struct bnge_fw_msg fw_msg = {};
int rc = -EINVAL;
- if (!rdev)
- return rc;
-
- if (!aux_dev)
- return rc;
-
bng_re_init_hwrm_hdr((void *)&req, HWRM_RING_FREE);
req.ring_type = type;
req.ring_id = cpu_to_le16(fw_ring_id);
@@ -150,10 +141,7 @@ static int bng_re_net_ring_alloc(struct bng_re_dev *rdev,
struct hwrm_ring_alloc_input req = {};
struct hwrm_ring_alloc_output resp;
struct bnge_fw_msg fw_msg = {};
- int rc = -EINVAL;
-
- if (!aux_dev)
- return rc;
+ int rc;
bng_re_init_hwrm_hdr((void *)&req, HWRM_RING_ALLOC);
req.enables = 0;
@@ -184,10 +172,7 @@ static int bng_re_stats_ctx_free(struct bng_re_dev *rdev)
struct hwrm_stat_ctx_free_input req = {};
struct hwrm_stat_ctx_free_output resp = {};
struct bnge_fw_msg fw_msg = {};
- int rc = -EINVAL;
-
- if (!aux_dev)
- return rc;
+ int rc;
bng_re_init_hwrm_hdr((void *)&req, HWRM_STAT_CTX_FREE);
req.stat_ctx_id = cpu_to_le32(rdev->stats_ctx.fw_id);
@@ -208,13 +193,10 @@ static int bng_re_stats_ctx_alloc(struct bng_re_dev *rdev)
struct hwrm_stat_ctx_alloc_output resp = {};
struct hwrm_stat_ctx_alloc_input req = {};
struct bnge_fw_msg fw_msg = {};
- int rc = -EINVAL;
+ int rc;
stats->fw_id = BNGE_INVALID_STATS_CTX_ID;
- if (!aux_dev)
- return rc;
-
bng_re_init_hwrm_hdr((void *)&req, HWRM_STAT_CTX_ALLOC);
req.update_period_ms = cpu_to_le32(1000);
req.stats_dma_addr = cpu_to_le64(stats->dma_map);
@@ -303,7 +285,7 @@ static int bng_re_dev_init(struct bng_re_dev *rdev)
if (rc) {
ibdev_err(&rdev->ibdev,
"Failed to register with netedev: %#x\n", rc);
- return -EINVAL;
+ goto reg_netdev_fail;
}
set_bit(BNG_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
@@ -312,19 +294,16 @@ static int bng_re_dev_init(struct bng_re_dev *rdev)
ibdev_err(&rdev->ibdev,
"RoCE requires minimum 2 MSI-X vectors, but only %d reserved\n",
rdev->aux_dev->auxr_info->msix_requested);
- bnge_unregister_dev(rdev->aux_dev);
- clear_bit(BNG_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
- return -EINVAL;
+ rc = -EINVAL;
+ goto msix_ctx_fail;
}
ibdev_dbg(&rdev->ibdev, "Got %d MSI-X vectors\n",
rdev->aux_dev->auxr_info->msix_requested);
rc = bng_re_setup_chip_ctx(rdev);
if (rc) {
- bnge_unregister_dev(rdev->aux_dev);
- clear_bit(BNG_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
ibdev_err(&rdev->ibdev, "Failed to get chip context\n");
- return -EINVAL;
+ goto msix_ctx_fail;
}
bng_re_query_hwrm_version(rdev);
@@ -333,16 +312,14 @@ static int bng_re_dev_init(struct bng_re_dev *rdev)
if (rc) {
ibdev_err(&rdev->ibdev,
"Failed to allocate RCFW Channel: %#x\n", rc);
- goto fail;
+ goto alloc_fw_chl_fail;
}
/* Allocate nq record memory */
rdev->nqr = kzalloc_obj(*rdev->nqr);
if (!rdev->nqr) {
- bng_re_destroy_chip_ctx(rdev);
- bnge_unregister_dev(rdev->aux_dev);
- clear_bit(BNG_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
- return -ENOMEM;
+ rc = -ENOMEM;
+ goto nq_alloc_fail;
}
rdev->nqr->num_msix = rdev->aux_dev->auxr_info->msix_requested;
@@ -411,9 +388,15 @@ disable_rcfw:
free_ring:
bng_re_net_ring_free(rdev, rdev->rcfw.creq.ring_id, type);
free_rcfw:
+ kfree(rdev->nqr);
+nq_alloc_fail:
bng_re_free_rcfw_channel(&rdev->rcfw);
-fail:
- bng_re_dev_uninit(rdev);
+alloc_fw_chl_fail:
+ bng_re_destroy_chip_ctx(rdev);
+msix_ctx_fail:
+ bnge_unregister_dev(rdev->aux_dev);
+ clear_bit(BNG_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
+reg_netdev_fail:
return rc;
}
@@ -486,8 +469,7 @@ static void bng_re_remove(struct auxiliary_device *adev)
rdev = dev_info->rdev;
- if (rdev)
- bng_re_remove_device(rdev, adev);
+ bng_re_remove_device(rdev, adev);
kfree(dev_info);
}
diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
index b5b93b42e6c4..b0aa7c0238ed 100644
--- a/drivers/infiniband/hw/efa/efa_verbs.c
+++ b/drivers/infiniband/hw/efa/efa_verbs.c
@@ -1661,7 +1661,7 @@ static struct efa_mr *efa_alloc_mr(struct ib_pd *ibpd, int access_flags,
struct efa_mr *mr;
if (udata && udata->inlen &&
- !ib_is_udata_cleared(udata, 0, sizeof(udata->inlen))) {
+ !ib_is_udata_cleared(udata, 0, udata->inlen)) {
ibdev_dbg(&dev->ibdev,
"Incompatible ABI params, udata not cleared\n");
return ERR_PTR(-EINVAL);
diff --git a/drivers/infiniband/hw/ionic/ionic_controlpath.c b/drivers/infiniband/hw/ionic/ionic_controlpath.c
index 5b3f40bd98d8..4842931f5316 100644
--- a/drivers/infiniband/hw/ionic/ionic_controlpath.c
+++ b/drivers/infiniband/hw/ionic/ionic_controlpath.c
@@ -1218,7 +1218,7 @@ int ionic_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
rdma_udata_to_drv_context(udata, struct ionic_ctx, ibctx);
struct ionic_vcq *vcq = to_ionic_vcq(ibcq);
struct ionic_tbl_buf buf = {};
- struct ionic_cq_resp resp;
+ struct ionic_cq_resp resp = {};
struct ionic_cq_req req;
int udma_idx = 0, rc;
diff --git a/drivers/infiniband/hw/ionic/ionic_ibdev.c b/drivers/infiniband/hw/ionic/ionic_ibdev.c
index 164046d00e5d..bd4c73e530d0 100644
--- a/drivers/infiniband/hw/ionic/ionic_ibdev.c
+++ b/drivers/infiniband/hw/ionic/ionic_ibdev.c
@@ -81,6 +81,8 @@ static int ionic_query_port(struct ib_device *ibdev, u32 port,
return -EINVAL;
ndev = ib_device_get_netdev(ibdev, port);
+ if (!ndev)
+ return -ENODEV;
if (netif_running(ndev) && netif_carrier_ok(ndev)) {
attr->state = IB_PORT_ACTIVE;
diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
index 15af53237217..7251cd7a2147 100644
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -5212,7 +5212,7 @@ static int irdma_create_user_ah(struct ib_ah *ibah,
#define IRDMA_CREATE_AH_MIN_RESP_LEN offsetofend(struct irdma_create_ah_resp, rsvd)
struct irdma_ah *ah = container_of(ibah, struct irdma_ah, ibah);
struct irdma_device *iwdev = to_iwdev(ibah->pd->device);
- struct irdma_create_ah_resp uresp;
+ struct irdma_create_ah_resp uresp = {};
struct irdma_ah *parent_ah;
int err;
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index ef0635064fba..5c182ae4fc2f 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -428,6 +428,8 @@ static int mthca_create_srq(struct ib_srq *ibsrq,
if (context && ib_copy_to_udata(udata, &srq->srqn, sizeof(__u32))) {
mthca_free_srq(to_mdev(ibsrq->device), srq);
+ mthca_unmap_user_db(to_mdev(ibsrq->device), &context->uar,
+ context->db_tab, ucmd.db_index);
return -EFAULT;
}
@@ -436,6 +438,7 @@ static int mthca_create_srq(struct ib_srq *ibsrq,
static int mthca_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
{
+ mthca_free_srq(to_mdev(srq->device), to_msrq(srq));
if (udata) {
struct mthca_ucontext *context =
rdma_udata_to_drv_context(
@@ -446,8 +449,6 @@ static int mthca_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
mthca_unmap_user_db(to_mdev(srq->device), &context->uar,
context->db_tab, to_msrq(srq)->db_index);
}
-
- mthca_free_srq(to_mdev(srq->device), to_msrq(srq));
return 0;
}
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 30783814f9d9..291d7668cc8d 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -3474,6 +3474,7 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
int lpi_base;
int nr_lpis;
int nr_ites;
+ int id_bits;
int sz;
if (!its_alloc_device_table(its, dev_id))
@@ -3485,7 +3486,10 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
/*
* Even if the device wants a single LPI, the ITT must be
* sized as a power of two (and you need at least one bit...).
+ * Also honor the ITS's own EID limit.
*/
+ id_bits = FIELD_GET(GITS_TYPER_IDBITS, its->typer) + 1;
+ nvecs = min_t(unsigned int, nvecs, BIT(id_bits));
nr_ites = max(2, nvecs);
sz = nr_ites * (FIELD_GET(GITS_TYPER_ITT_ENTRY_SIZE, its->typer) + 1);
sz = max(sz, ITS_ITT_ALIGN);
diff --git a/drivers/irqchip/irq-gic-v5-irs.c b/drivers/irqchip/irq-gic-v5-irs.c
index e518e5dfede7..f3fce0b1e25d 100644
--- a/drivers/irqchip/irq-gic-v5-irs.c
+++ b/drivers/irqchip/irq-gic-v5-irs.c
@@ -699,7 +699,7 @@ static int __init gicv5_irs_init(struct gicv5_irs_chip_data *irs_data)
*/
if (list_empty(&irs_nodes)) {
idr = irs_readl_relaxed(irs_data, GICV5_IRS_IDR0);
- gicv5_global_data.virt_capable = !FIELD_GET(GICV5_IRS_IDR0_VIRT, idr);
+ gicv5_global_data.virt_capable = !!FIELD_GET(GICV5_IRS_IDR0_VIRT, idr);
idr = irs_readl_relaxed(irs_data, GICV5_IRS_IDR1);
irs_setup_pri_bits(idr);
diff --git a/drivers/irqchip/irq-ls-extirq.c b/drivers/irqchip/irq-ls-extirq.c
index a7e9c3885b09..d724fe843980 100644
--- a/drivers/irqchip/irq-ls-extirq.c
+++ b/drivers/irqchip/irq-ls-extirq.c
@@ -125,32 +125,45 @@ static const struct irq_domain_ops extirq_domain_ops = {
static int
ls_extirq_parse_map(struct ls_extirq_data *priv, struct device_node *node)
{
- struct of_imap_parser imap_parser;
- struct of_imap_item imap_item;
+ const __be32 *map;
+ u32 mapsize;
int ret;
- ret = of_imap_parser_init(&imap_parser, node, &imap_item);
- if (ret)
- return ret;
+ map = of_get_property(node, "interrupt-map", &mapsize);
+ if (!map)
+ return -ENOENT;
+ if (mapsize % sizeof(*map))
+ return -EINVAL;
+ mapsize /= sizeof(*map);
- for_each_of_imap_item(&imap_parser, &imap_item) {
+ while (mapsize) {
struct device_node *ipar;
- u32 hwirq;
- int i;
+ u32 hwirq, intsize, j;
- hwirq = imap_item.child_imap[0];
- if (hwirq >= MAXIRQ) {
- of_node_put(imap_item.parent_args.np);
+ if (mapsize < 3)
+ return -EINVAL;
+ hwirq = be32_to_cpup(map);
+ if (hwirq >= MAXIRQ)
return -EINVAL;
- }
priv->nirq = max(priv->nirq, hwirq + 1);
- ipar = of_node_get(imap_item.parent_args.np);
- priv->map[hwirq].fwnode = of_fwnode_handle(ipar);
+ ipar = of_find_node_by_phandle(be32_to_cpup(map + 2));
+ map += 3;
+ mapsize -= 3;
+ if (!ipar)
+ return -EINVAL;
+ priv->map[hwirq].fwnode = &ipar->fwnode;
+ ret = of_property_read_u32(ipar, "#interrupt-cells", &intsize);
+ if (ret)
+ return ret;
- priv->map[hwirq].param_count = imap_item.parent_args.args_count;
- for (i = 0; i < priv->map[hwirq].param_count; i++)
- priv->map[hwirq].param[i] = imap_item.parent_args.args[i];
+ if (intsize > mapsize)
+ return -EINVAL;
+
+ priv->map[hwirq].param_count = intsize;
+ for (j = 0; j < intsize; ++j)
+ priv->map[hwirq].param[j] = be32_to_cpup(map++);
+ mapsize -= intsize;
}
return 0;
}
@@ -177,8 +190,10 @@ static int ls_extirq_probe(struct platform_device *pdev)
return dev_err_probe(dev, -ENOMEM, "Failed to allocate memory\n");
priv->intpcr = devm_of_iomap(dev, node, 0, NULL);
- if (!priv->intpcr)
- return dev_err_probe(dev, -ENOMEM, "Cannot ioremap OF node %pOF\n", node);
+ if (IS_ERR(priv->intpcr)) {
+ return dev_err_probe(dev, PTR_ERR(priv->intpcr),
+ "Cannot ioremap OF node %pOF\n", node);
+ }
ret = ls_extirq_parse_map(priv, node);
if (ret)
diff --git a/drivers/irqchip/irq-mmp.c b/drivers/irqchip/irq-mmp.c
index 09e640430208..8e210cb451be 100644
--- a/drivers/irqchip/irq-mmp.c
+++ b/drivers/irqchip/irq-mmp.c
@@ -136,7 +136,7 @@ static void icu_unmask_irq(struct irq_data *d)
}
}
-struct irq_chip icu_irq_chip = {
+static const struct irq_chip icu_irq_chip = {
.name = "icu_irq",
.irq_mask = icu_mask_irq,
.irq_mask_ack = icu_mask_ack_irq,
diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
index 686d39254426..5b0dac104814 100644
--- a/drivers/irqchip/irq-sifive-plic.c
+++ b/drivers/irqchip/irq-sifive-plic.c
@@ -172,8 +172,13 @@ static void plic_irq_disable(struct irq_data *d)
static void plic_irq_eoi(struct irq_data *d)
{
struct plic_handler *handler = this_cpu_ptr(&plic_handlers);
+ u32 __iomem *reg;
+ bool enabled;
+
+ reg = handler->enable_base + (d->hwirq / 32) * sizeof(u32);
+ enabled = readl(reg) & BIT(d->hwirq % 32);
- if (unlikely(irqd_irq_disabled(d))) {
+ if (unlikely(!enabled)) {
plic_toggle(handler, d->hwirq, 1);
writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
plic_toggle(handler, d->hwirq, 0);
diff --git a/drivers/media/dvb-core/dvb_net.c b/drivers/media/dvb-core/dvb_net.c
index 8bb8dd34c223..a2159b2bc176 100644
--- a/drivers/media/dvb-core/dvb_net.c
+++ b/drivers/media/dvb-core/dvb_net.c
@@ -228,6 +228,9 @@ static int handle_one_ule_extension( struct dvb_net_priv *p )
unsigned char hlen = (p->ule_sndu_type & 0x0700) >> 8;
unsigned char htype = p->ule_sndu_type & 0x00FF;
+ if (htype >= ARRAY_SIZE(ule_mandatory_ext_handlers))
+ return -1;
+
/* Discriminate mandatory and optional extension headers. */
if (hlen == 0) {
/* Mandatory extension header */
diff --git a/drivers/mmc/host/dw_mmc-rockchip.c b/drivers/mmc/host/dw_mmc-rockchip.c
index 4e3423a19bdf..ac069d0c42b2 100644
--- a/drivers/mmc/host/dw_mmc-rockchip.c
+++ b/drivers/mmc/host/dw_mmc-rockchip.c
@@ -36,6 +36,8 @@ struct dw_mci_rockchip_priv_data {
int default_sample_phase;
int num_phases;
bool internal_phase;
+ int sample_phase;
+ int drv_phase;
};
/*
@@ -573,9 +575,43 @@ static void dw_mci_rockchip_remove(struct platform_device *pdev)
dw_mci_pltfm_remove(pdev);
}
+static int dw_mci_rockchip_runtime_suspend(struct device *dev)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct dw_mci *host = platform_get_drvdata(pdev);
+ struct dw_mci_rockchip_priv_data *priv = host->priv;
+
+ if (priv->internal_phase) {
+ priv->sample_phase = rockchip_mmc_get_phase(host, true);
+ priv->drv_phase = rockchip_mmc_get_phase(host, false);
+ }
+
+ return dw_mci_runtime_suspend(dev);
+}
+
+static int dw_mci_rockchip_runtime_resume(struct device *dev)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct dw_mci *host = platform_get_drvdata(pdev);
+ struct dw_mci_rockchip_priv_data *priv = host->priv;
+ int ret;
+
+ ret = dw_mci_runtime_resume(dev);
+ if (ret)
+ return ret;
+
+ if (priv->internal_phase) {
+ rockchip_mmc_set_phase(host, true, priv->sample_phase);
+ rockchip_mmc_set_phase(host, false, priv->drv_phase);
+ mci_writel(host, MISC_CON, MEM_CLK_AUTOGATE_ENABLE);
+ }
+
+ return ret;
+}
+
static const struct dev_pm_ops dw_mci_rockchip_dev_pm_ops = {
SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume)
- RUNTIME_PM_OPS(dw_mci_runtime_suspend, dw_mci_runtime_resume, NULL)
+ RUNTIME_PM_OPS(dw_mci_rockchip_runtime_suspend, dw_mci_rockchip_runtime_resume, NULL)
};
static struct platform_driver dw_mci_rockchip_pltfm_driver = {
diff --git a/drivers/mmc/host/mmci_qcom_dml.c b/drivers/mmc/host/mmci_qcom_dml.c
index 3da6112fbe39..67371389cc33 100644
--- a/drivers/mmc/host/mmci_qcom_dml.c
+++ b/drivers/mmc/host/mmci_qcom_dml.c
@@ -109,6 +109,7 @@ static int of_get_dml_pipe_index(struct device_node *np, const char *name)
&dma_spec))
return -ENODEV;
+ of_node_put(dma_spec.np);
if (dma_spec.args_count)
return dma_spec.args[0];
diff --git a/drivers/mmc/host/sdhci-brcmstb.c b/drivers/mmc/host/sdhci-brcmstb.c
index c9442499876c..57e45951644e 100644
--- a/drivers/mmc/host/sdhci-brcmstb.c
+++ b/drivers/mmc/host/sdhci-brcmstb.c
@@ -116,7 +116,7 @@ static void sdhci_brcmstb_restore_regs(struct mmc_host *mmc, enum cfg_core_ver v
writel(sr->boot_main_ctl, priv->boot_regs + SDIO_BOOT_MAIN_CTL);
if (ver == SDIO_CFG_CORE_V1) {
- writel(sr->sd_pin_sel, cr + SDIO_CFG_SD_PIN_SEL);
+ writel(sr->sd_pin_sel, cr + SDIO_CFG_V1_SD_PIN_SEL);
return;
}
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 9af7dacae9b3..0ccf928eecde 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -324,7 +324,7 @@ static bool bond_sk_check(struct bonding *bond)
}
}
-bool bond_xdp_check(struct bonding *bond, int mode)
+bool __bond_xdp_check(int mode, int xmit_policy)
{
switch (mode) {
case BOND_MODE_ROUNDROBIN:
@@ -335,7 +335,7 @@ bool bond_xdp_check(struct bonding *bond, int mode)
/* vlan+srcmac is not supported with XDP as in most cases the 802.1q
* payload is not in the packet due to hardware offload.
*/
- if (bond->params.xmit_policy != BOND_XMIT_POLICY_VLAN_SRCMAC)
+ if (xmit_policy != BOND_XMIT_POLICY_VLAN_SRCMAC)
return true;
fallthrough;
default:
@@ -343,6 +343,11 @@ bool bond_xdp_check(struct bonding *bond, int mode)
}
}
+bool bond_xdp_check(struct bonding *bond, int mode)
+{
+ return __bond_xdp_check(mode, bond->params.xmit_policy);
+}
+
/*---------------------------------- VLAN -----------------------------------*/
/* In the following 2 functions, bond_vlan_rx_add_vid and bond_vlan_rx_kill_vid,
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index dcee384c2f06..7380cc4ee75a 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -1575,6 +1575,8 @@ static int bond_option_fail_over_mac_set(struct bonding *bond,
static int bond_option_xmit_hash_policy_set(struct bonding *bond,
const struct bond_opt_value *newval)
{
+ if (bond->xdp_prog && !__bond_xdp_check(BOND_MODE(bond), newval->value))
+ return -EOPNOTSUPP;
netdev_dbg(bond->dev, "Setting xmit hash policy to %s (%llu)\n",
newval->string, newval->value);
bond->params.xmit_policy = newval->value;
diff --git a/drivers/net/can/dummy_can.c b/drivers/net/can/dummy_can.c
index 41953655e3d3..cd23de488edc 100644
--- a/drivers/net/can/dummy_can.c
+++ b/drivers/net/can/dummy_can.c
@@ -241,6 +241,7 @@ static int __init dummy_can_init(void)
dev->netdev_ops = &dummy_can_netdev_ops;
dev->ethtool_ops = &dummy_can_ethtool_ops;
+ dev->flags |= IFF_ECHO; /* enable echo handling */
priv = netdev_priv(dev);
priv->can.bittiming_const = &dummy_can_bittiming_const;
priv->can.bitrate_max = 20 * MEGA /* BPS */;
diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c
index fa97adf25b73..bb7782582f40 100644
--- a/drivers/net/can/spi/mcp251x.c
+++ b/drivers/net/can/spi/mcp251x.c
@@ -1214,6 +1214,7 @@ static int mcp251x_open(struct net_device *net)
{
struct mcp251x_priv *priv = netdev_priv(net);
struct spi_device *spi = priv->spi;
+ bool release_irq = false;
unsigned long flags = 0;
int ret;
@@ -1257,12 +1258,24 @@ static int mcp251x_open(struct net_device *net)
return 0;
out_free_irq:
- free_irq(spi->irq, priv);
+ /* The IRQ handler might be running, and if so it will be waiting
+ * for the lock. But free_irq() must wait for the handler to finish
+ * so calling it here would deadlock.
+ *
+ * Setting priv->force_quit will let the handler exit right away
+ * without any access to the hardware. This make it safe to call
+ * free_irq() after the lock is released.
+ */
+ priv->force_quit = 1;
+ release_irq = true;
+
mcp251x_hw_sleep(spi);
out_close:
mcp251x_power_enable(priv->transceiver, 0);
close_candev(net);
mutex_unlock(&priv->mcp_lock);
+ if (release_irq)
+ free_irq(spi->irq, priv);
return ret;
}
diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c
index 4c219a5b139b..9b25dda7c183 100644
--- a/drivers/net/can/usb/ems_usb.c
+++ b/drivers/net/can/usb/ems_usb.c
@@ -445,6 +445,11 @@ static void ems_usb_read_bulk_callback(struct urb *urb)
start = CPC_HEADER_SIZE;
while (msg_count) {
+ if (start + CPC_MSG_HEADER_LEN > urb->actual_length) {
+ netdev_err(netdev, "format error\n");
+ break;
+ }
+
msg = (struct ems_cpc_msg *)&ibuf[start];
switch (msg->type) {
@@ -474,7 +479,7 @@ static void ems_usb_read_bulk_callback(struct urb *urb)
start += CPC_MSG_HEADER_LEN + msg->length;
msg_count--;
- if (start > urb->transfer_buffer_length) {
+ if (start > urb->actual_length) {
netdev_err(netdev, "format error\n");
break;
}
diff --git a/drivers/net/can/usb/esd_usb.c b/drivers/net/can/usb/esd_usb.c
index 2892a68f510a..d257440fa01f 100644
--- a/drivers/net/can/usb/esd_usb.c
+++ b/drivers/net/can/usb/esd_usb.c
@@ -272,6 +272,9 @@ struct esd_usb {
struct usb_anchor rx_submitted;
+ unsigned int rx_pipe;
+ unsigned int tx_pipe;
+
int net_count;
u32 version;
int rxinitdone;
@@ -537,7 +540,7 @@ static void esd_usb_read_bulk_callback(struct urb *urb)
}
resubmit_urb:
- usb_fill_bulk_urb(urb, dev->udev, usb_rcvbulkpipe(dev->udev, 1),
+ usb_fill_bulk_urb(urb, dev->udev, dev->rx_pipe,
urb->transfer_buffer, ESD_USB_RX_BUFFER_SIZE,
esd_usb_read_bulk_callback, dev);
@@ -626,9 +629,7 @@ static int esd_usb_send_msg(struct esd_usb *dev, union esd_usb_msg *msg)
{
int actual_length;
- return usb_bulk_msg(dev->udev,
- usb_sndbulkpipe(dev->udev, 2),
- msg,
+ return usb_bulk_msg(dev->udev, dev->tx_pipe, msg,
msg->hdr.len * sizeof(u32), /* convert to # of bytes */
&actual_length,
1000);
@@ -639,12 +640,8 @@ static int esd_usb_wait_msg(struct esd_usb *dev,
{
int actual_length;
- return usb_bulk_msg(dev->udev,
- usb_rcvbulkpipe(dev->udev, 1),
- msg,
- sizeof(*msg),
- &actual_length,
- 1000);
+ return usb_bulk_msg(dev->udev, dev->rx_pipe, msg,
+ sizeof(*msg), &actual_length, 1000);
}
static int esd_usb_setup_rx_urbs(struct esd_usb *dev)
@@ -677,8 +674,7 @@ static int esd_usb_setup_rx_urbs(struct esd_usb *dev)
urb->transfer_dma = buf_dma;
- usb_fill_bulk_urb(urb, dev->udev,
- usb_rcvbulkpipe(dev->udev, 1),
+ usb_fill_bulk_urb(urb, dev->udev, dev->rx_pipe,
buf, ESD_USB_RX_BUFFER_SIZE,
esd_usb_read_bulk_callback, dev);
urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
@@ -903,7 +899,7 @@ static netdev_tx_t esd_usb_start_xmit(struct sk_buff *skb,
/* hnd must not be 0 - MSB is stripped in txdone handling */
msg->tx.hnd = BIT(31) | i; /* returned in TX done message */
- usb_fill_bulk_urb(urb, dev->udev, usb_sndbulkpipe(dev->udev, 2), buf,
+ usb_fill_bulk_urb(urb, dev->udev, dev->tx_pipe, buf,
msg->hdr.len * sizeof(u32), /* convert to # of bytes */
esd_usb_write_bulk_callback, context);
@@ -1298,10 +1294,16 @@ done:
static int esd_usb_probe(struct usb_interface *intf,
const struct usb_device_id *id)
{
+ struct usb_endpoint_descriptor *ep_in, *ep_out;
struct esd_usb *dev;
union esd_usb_msg *msg;
int i, err;
+ err = usb_find_common_endpoints(intf->cur_altsetting, &ep_in, &ep_out,
+ NULL, NULL);
+ if (err)
+ return err;
+
dev = kzalloc_obj(*dev);
if (!dev) {
err = -ENOMEM;
@@ -1309,6 +1311,8 @@ static int esd_usb_probe(struct usb_interface *intf,
}
dev->udev = interface_to_usbdev(intf);
+ dev->rx_pipe = usb_rcvbulkpipe(dev->udev, ep_in->bEndpointAddress);
+ dev->tx_pipe = usb_sndbulkpipe(dev->udev, ep_out->bEndpointAddress);
init_usb_anchor(&dev->rx_submitted);
diff --git a/drivers/net/can/usb/etas_es58x/es58x_core.c b/drivers/net/can/usb/etas_es58x/es58x_core.c
index 2d248deb69dc..b259f6109808 100644
--- a/drivers/net/can/usb/etas_es58x/es58x_core.c
+++ b/drivers/net/can/usb/etas_es58x/es58x_core.c
@@ -1461,12 +1461,18 @@ static void es58x_read_bulk_callback(struct urb *urb)
}
resubmit_urb:
+ usb_anchor_urb(urb, &es58x_dev->rx_urbs);
ret = usb_submit_urb(urb, GFP_ATOMIC);
+ if (!ret)
+ return;
+
+ usb_unanchor_urb(urb);
+
if (ret == -ENODEV) {
for (i = 0; i < es58x_dev->num_can_ch; i++)
if (es58x_dev->netdev[i])
netif_device_detach(es58x_dev->netdev[i]);
- } else if (ret)
+ } else
dev_err_ratelimited(dev,
"Failed resubmitting read bulk urb: %pe\n",
ERR_PTR(ret));
diff --git a/drivers/net/can/usb/f81604.c b/drivers/net/can/usb/f81604.c
index 76578063ac82..f12318268e46 100644
--- a/drivers/net/can/usb/f81604.c
+++ b/drivers/net/can/usb/f81604.c
@@ -413,6 +413,7 @@ static void f81604_read_bulk_callback(struct urb *urb)
{
struct f81604_can_frame *frame = urb->transfer_buffer;
struct net_device *netdev = urb->context;
+ struct f81604_port_priv *priv = netdev_priv(netdev);
int ret;
if (!netif_device_present(netdev))
@@ -445,10 +446,15 @@ static void f81604_read_bulk_callback(struct urb *urb)
f81604_process_rx_packet(netdev, frame);
resubmit_urb:
+ usb_anchor_urb(urb, &priv->urbs_anchor);
ret = usb_submit_urb(urb, GFP_ATOMIC);
+ if (!ret)
+ return;
+ usb_unanchor_urb(urb);
+
if (ret == -ENODEV)
netif_device_detach(netdev);
- else if (ret)
+ else
netdev_err(netdev,
"%s: failed to resubmit read bulk urb: %pe\n",
__func__, ERR_PTR(ret));
@@ -620,6 +626,12 @@ static void f81604_read_int_callback(struct urb *urb)
netdev_info(netdev, "%s: Int URB aborted: %pe\n", __func__,
ERR_PTR(urb->status));
+ if (urb->actual_length < sizeof(*data)) {
+ netdev_warn(netdev, "%s: short int URB: %u < %zu\n",
+ __func__, urb->actual_length, sizeof(*data));
+ goto resubmit_urb;
+ }
+
switch (urb->status) {
case 0: /* success */
break;
@@ -646,10 +658,15 @@ static void f81604_read_int_callback(struct urb *urb)
f81604_handle_tx(priv, data);
resubmit_urb:
+ usb_anchor_urb(urb, &priv->urbs_anchor);
ret = usb_submit_urb(urb, GFP_ATOMIC);
+ if (!ret)
+ return;
+ usb_unanchor_urb(urb);
+
if (ret == -ENODEV)
netif_device_detach(netdev);
- else if (ret)
+ else
netdev_err(netdev, "%s: failed to resubmit int urb: %pe\n",
__func__, ERR_PTR(ret));
}
@@ -874,9 +891,27 @@ static void f81604_write_bulk_callback(struct urb *urb)
if (!netif_device_present(netdev))
return;
- if (urb->status)
- netdev_info(netdev, "%s: Tx URB error: %pe\n", __func__,
- ERR_PTR(urb->status));
+ if (!urb->status)
+ return;
+
+ switch (urb->status) {
+ case -ENOENT:
+ case -ECONNRESET:
+ case -ESHUTDOWN:
+ return;
+ default:
+ break;
+ }
+
+ if (net_ratelimit())
+ netdev_err(netdev, "%s: Tx URB error: %pe\n", __func__,
+ ERR_PTR(urb->status));
+
+ can_free_echo_skb(netdev, 0, NULL);
+ netdev->stats.tx_dropped++;
+ netdev->stats.tx_errors++;
+
+ netif_wake_queue(netdev);
}
static void f81604_clear_reg_work(struct work_struct *work)
diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
index 9d27d6f0c0b5..ec9a7cbbbc69 100644
--- a/drivers/net/can/usb/gs_usb.c
+++ b/drivers/net/can/usb/gs_usb.c
@@ -772,9 +772,8 @@ device_detach:
}
}
-static int gs_usb_set_bittiming(struct net_device *netdev)
+static int gs_usb_set_bittiming(struct gs_can *dev)
{
- struct gs_can *dev = netdev_priv(netdev);
struct can_bittiming *bt = &dev->can.bittiming;
struct gs_device_bittiming dbt = {
.prop_seg = cpu_to_le32(bt->prop_seg),
@@ -791,9 +790,8 @@ static int gs_usb_set_bittiming(struct net_device *netdev)
GFP_KERNEL);
}
-static int gs_usb_set_data_bittiming(struct net_device *netdev)
+static int gs_usb_set_data_bittiming(struct gs_can *dev)
{
- struct gs_can *dev = netdev_priv(netdev);
struct can_bittiming *bt = &dev->can.fd.data_bittiming;
struct gs_device_bittiming dbt = {
.prop_seg = cpu_to_le32(bt->prop_seg),
@@ -1057,6 +1055,20 @@ static int gs_can_open(struct net_device *netdev)
if (dev->feature & GS_CAN_FEATURE_HW_TIMESTAMP)
flags |= GS_CAN_MODE_HW_TIMESTAMP;
+ rc = gs_usb_set_bittiming(dev);
+ if (rc) {
+ netdev_err(netdev, "failed to set bittiming: %pe\n", ERR_PTR(rc));
+ goto out_usb_kill_anchored_urbs;
+ }
+
+ if (ctrlmode & CAN_CTRLMODE_FD) {
+ rc = gs_usb_set_data_bittiming(dev);
+ if (rc) {
+ netdev_err(netdev, "failed to set data bittiming: %pe\n", ERR_PTR(rc));
+ goto out_usb_kill_anchored_urbs;
+ }
+ }
+
/* finally start device */
dev->can.state = CAN_STATE_ERROR_ACTIVE;
dm.flags = cpu_to_le32(flags);
@@ -1370,7 +1382,6 @@ static struct gs_can *gs_make_candev(unsigned int channel,
dev->can.state = CAN_STATE_STOPPED;
dev->can.clock.freq = le32_to_cpu(bt_const.fclk_can);
dev->can.bittiming_const = &dev->bt_const;
- dev->can.do_set_bittiming = gs_usb_set_bittiming;
dev->can.ctrlmode_supported = CAN_CTRLMODE_CC_LEN8_DLC;
@@ -1394,7 +1405,6 @@ static struct gs_can *gs_make_candev(unsigned int channel,
* GS_CAN_FEATURE_BT_CONST_EXT is set.
*/
dev->can.fd.data_bittiming_const = &dev->bt_const;
- dev->can.fd.do_set_data_bittiming = gs_usb_set_data_bittiming;
}
if (feature & GS_CAN_FEATURE_TERMINATION) {
diff --git a/drivers/net/can/usb/ucan.c b/drivers/net/can/usb/ucan.c
index c79508b1c43e..0ea0ac75e42f 100644
--- a/drivers/net/can/usb/ucan.c
+++ b/drivers/net/can/usb/ucan.c
@@ -748,7 +748,7 @@ static void ucan_read_bulk_callback(struct urb *urb)
len = le16_to_cpu(m->len);
/* check sanity (length of content) */
- if (urb->actual_length - pos < len) {
+ if ((len == 0) || (urb->actual_length - pos < len)) {
netdev_warn(up->netdev,
"invalid message (short; no data; l:%d)\n",
urb->actual_length);
diff --git a/drivers/net/dsa/realtek/rtl8365mb.c b/drivers/net/dsa/realtek/rtl8365mb.c
index c575e164368c..f938a3f701cc 100644
--- a/drivers/net/dsa/realtek/rtl8365mb.c
+++ b/drivers/net/dsa/realtek/rtl8365mb.c
@@ -769,7 +769,7 @@ static int rtl8365mb_phy_ocp_write(struct realtek_priv *priv, int phy,
out:
rtl83xx_unlock(priv);
- return 0;
+ return ret;
}
static int rtl8365mb_phy_read(struct realtek_priv *priv, int phy, int regnum)
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
index 57cd8d2e1ad7..c17900a49595 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
@@ -431,7 +431,7 @@
#define MAC_SSIR_SSINC_INDEX 16
#define MAC_SSIR_SSINC_WIDTH 8
#define MAC_TCR_SS_INDEX 29
-#define MAC_TCR_SS_WIDTH 2
+#define MAC_TCR_SS_WIDTH 3
#define MAC_TCR_TE_INDEX 0
#define MAC_TCR_TE_WIDTH 1
#define MAC_TCR_VNE_INDEX 24
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 62bb4b8a68e1..8b79d88480db 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1120,7 +1120,6 @@ int xgbe_powerdown(struct net_device *netdev, unsigned int caller)
{
struct xgbe_prv_data *pdata = netdev_priv(netdev);
struct xgbe_hw_if *hw_if = &pdata->hw_if;
- unsigned long flags;
DBGPR("-->xgbe_powerdown\n");
@@ -1131,8 +1130,6 @@ int xgbe_powerdown(struct net_device *netdev, unsigned int caller)
return -EINVAL;
}
- spin_lock_irqsave(&pdata->lock, flags);
-
if (caller == XGMAC_DRIVER_CONTEXT)
netif_device_detach(netdev);
@@ -1148,8 +1145,6 @@ int xgbe_powerdown(struct net_device *netdev, unsigned int caller)
pdata->power_down = 1;
- spin_unlock_irqrestore(&pdata->lock, flags);
-
DBGPR("<--xgbe_powerdown\n");
return 0;
@@ -1159,7 +1154,6 @@ int xgbe_powerup(struct net_device *netdev, unsigned int caller)
{
struct xgbe_prv_data *pdata = netdev_priv(netdev);
struct xgbe_hw_if *hw_if = &pdata->hw_if;
- unsigned long flags;
DBGPR("-->xgbe_powerup\n");
@@ -1170,8 +1164,6 @@ int xgbe_powerup(struct net_device *netdev, unsigned int caller)
return -EINVAL;
}
- spin_lock_irqsave(&pdata->lock, flags);
-
pdata->power_down = 0;
xgbe_napi_enable(pdata, 0);
@@ -1186,8 +1178,6 @@ int xgbe_powerup(struct net_device *netdev, unsigned int caller)
xgbe_start_timers(pdata);
- spin_unlock_irqrestore(&pdata->lock, flags);
-
DBGPR("<--xgbe_powerup\n");
return 0;
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
index d1f0419edb23..7d45ea22a02e 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
@@ -76,7 +76,6 @@ struct xgbe_prv_data *xgbe_alloc_pdata(struct device *dev)
pdata->netdev = netdev;
pdata->dev = dev;
- spin_lock_init(&pdata->lock);
spin_lock_init(&pdata->xpcs_lock);
mutex_init(&pdata->rss_mutex);
spin_lock_init(&pdata->tstamp_lock);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
index f1b2954017e6..4333d269ee84 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -1027,9 +1027,6 @@ struct xgbe_prv_data {
unsigned int pp3;
unsigned int pp4;
- /* Overall device lock */
- spinlock_t lock;
-
/* XPCS indirect addressing lock */
spinlock_t xpcs_lock;
unsigned int xpcs_window_def_reg;
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index fc0e0f36186e..52c1cb9cb7e0 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -1533,7 +1533,7 @@ static irqreturn_t dpaa2_switch_irq0_handler_thread(int irq_num, void *arg)
if_id = (status & 0xFFFF0000) >> 16;
if (if_id >= ethsw->sw_attr.num_ifs) {
dev_err(dev, "Invalid if_id %d in IRQ status\n", if_id);
- goto out;
+ goto out_clear;
}
port_priv = ethsw->ports[if_id];
@@ -1553,6 +1553,7 @@ static irqreturn_t dpaa2_switch_irq0_handler_thread(int irq_num, void *arg)
dpaa2_switch_port_connect_mac(port_priv);
}
+out_clear:
err = dpsw_clear_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle,
DPSW_IRQ_INDEX_IF, status);
if (err)
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 70768392912c..a146ceaf2ed6 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -3467,7 +3467,7 @@ static int enetc_int_vector_init(struct enetc_ndev_priv *priv, int i,
priv->rx_ring[i] = bdr;
err = __xdp_rxq_info_reg(&bdr->xdp.rxq, priv->ndev, i, 0,
- ENETC_RXB_DMA_SIZE_XDP);
+ ENETC_RXB_TRUESIZE);
if (err)
goto free_vector;
diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h
index ba331899d186..d4a1041e456d 100644
--- a/drivers/net/ethernet/intel/e1000e/defines.h
+++ b/drivers/net/ethernet/intel/e1000e/defines.h
@@ -33,6 +33,7 @@
/* Extended Device Control */
#define E1000_CTRL_EXT_LPCD 0x00000004 /* LCD Power Cycle Done */
+#define E1000_CTRL_EXT_DPG_EN 0x00000008 /* Dynamic Power Gating Enable */
#define E1000_CTRL_EXT_SDP3_DATA 0x00000080 /* Value of SW Definable Pin 3 */
#define E1000_CTRL_EXT_FORCE_SMBUS 0x00000800 /* Force SMBus mode */
#define E1000_CTRL_EXT_EE_RST 0x00002000 /* Reinitialize from EEPROM */
diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h
index aa08f397988e..63ebe00376f5 100644
--- a/drivers/net/ethernet/intel/e1000e/e1000.h
+++ b/drivers/net/ethernet/intel/e1000e/e1000.h
@@ -117,7 +117,8 @@ enum e1000_boards {
board_pch_cnp,
board_pch_tgp,
board_pch_adp,
- board_pch_mtp
+ board_pch_mtp,
+ board_pch_ptp
};
struct e1000_ps_page {
@@ -527,6 +528,7 @@ extern const struct e1000_info e1000_pch_cnp_info;
extern const struct e1000_info e1000_pch_tgp_info;
extern const struct e1000_info e1000_pch_adp_info;
extern const struct e1000_info e1000_pch_mtp_info;
+extern const struct e1000_info e1000_pch_ptp_info;
extern const struct e1000_info e1000_es2_info;
void e1000e_ptp_init(struct e1000_adapter *adapter);
diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h
index fc8ed38aa095..c7ac599e5a7a 100644
--- a/drivers/net/ethernet/intel/e1000e/hw.h
+++ b/drivers/net/ethernet/intel/e1000e/hw.h
@@ -118,8 +118,6 @@ struct e1000_hw;
#define E1000_DEV_ID_PCH_ARL_I219_V24 0x57A1
#define E1000_DEV_ID_PCH_PTP_I219_LM25 0x57B3
#define E1000_DEV_ID_PCH_PTP_I219_V25 0x57B4
-#define E1000_DEV_ID_PCH_PTP_I219_LM26 0x57B5
-#define E1000_DEV_ID_PCH_PTP_I219_V26 0x57B6
#define E1000_DEV_ID_PCH_PTP_I219_LM27 0x57B7
#define E1000_DEV_ID_PCH_PTP_I219_V27 0x57B8
#define E1000_DEV_ID_PCH_NVL_I219_LM29 0x57B9
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index 0ff8688ac3b8..dea208db1be5 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -528,7 +528,7 @@ static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw)
phy->id = e1000_phy_unknown;
- if (hw->mac.type == e1000_pch_mtp) {
+ if (hw->mac.type == e1000_pch_mtp || hw->mac.type == e1000_pch_ptp) {
phy->retry_count = 2;
e1000e_enable_phy_retry(hw);
}
@@ -4932,6 +4932,15 @@ static s32 e1000_reset_hw_ich8lan(struct e1000_hw *hw)
reg |= E1000_KABGTXD_BGSQLBIAS;
ew32(KABGTXD, reg);
+ /* The hardware reset value of the DPG_EN bit is 1.
+ * Clear DPG_EN to prevent unexpected autonomous power gating.
+ */
+ if (hw->mac.type >= e1000_pch_ptp) {
+ reg = er32(CTRL_EXT);
+ reg &= ~E1000_CTRL_EXT_DPG_EN;
+ ew32(CTRL_EXT, reg);
+ }
+
return 0;
}
@@ -6208,3 +6217,23 @@ const struct e1000_info e1000_pch_mtp_info = {
.phy_ops = &ich8_phy_ops,
.nvm_ops = &spt_nvm_ops,
};
+
+const struct e1000_info e1000_pch_ptp_info = {
+ .mac = e1000_pch_ptp,
+ .flags = FLAG_IS_ICH
+ | FLAG_HAS_WOL
+ | FLAG_HAS_HW_TIMESTAMP
+ | FLAG_HAS_CTRLEXT_ON_LOAD
+ | FLAG_HAS_AMT
+ | FLAG_HAS_FLASH
+ | FLAG_HAS_JUMBO_FRAMES
+ | FLAG_APME_IN_WUC,
+ .flags2 = FLAG2_HAS_PHY_STATS
+ | FLAG2_HAS_EEE,
+ .pba = 26,
+ .max_hw_frame_size = 9022,
+ .get_variants = e1000_get_variants_ich8lan,
+ .mac_ops = &ich8_mac_ops,
+ .phy_ops = &ich8_phy_ops,
+ .nvm_ops = &spt_nvm_ops,
+};
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index fd4b117ed20f..35dab1e3132f 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -55,6 +55,7 @@ static const struct e1000_info *e1000_info_tbl[] = {
[board_pch_tgp] = &e1000_pch_tgp_info,
[board_pch_adp] = &e1000_pch_adp_info,
[board_pch_mtp] = &e1000_pch_mtp_info,
+ [board_pch_ptp] = &e1000_pch_ptp_info,
};
struct e1000_reg_info {
@@ -7922,14 +7923,12 @@ static const struct pci_device_id e1000_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_V21), board_pch_mtp },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ARL_I219_LM24), board_pch_mtp },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ARL_I219_V24), board_pch_mtp },
- { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_LM25), board_pch_mtp },
- { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_V25), board_pch_mtp },
- { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_LM26), board_pch_mtp },
- { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_V26), board_pch_mtp },
- { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_LM27), board_pch_mtp },
- { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_V27), board_pch_mtp },
- { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_NVL_I219_LM29), board_pch_mtp },
- { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_NVL_I219_V29), board_pch_mtp },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_LM25), board_pch_ptp },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_V25), board_pch_ptp },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_LM27), board_pch_ptp },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_V27), board_pch_ptp },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_NVL_I219_LM29), board_pch_ptp },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_NVL_I219_V29), board_pch_ptp },
{ 0, 0, 0, 0, 0, 0, 0 } /* terminate list */
};
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 7b9e147d7365..926d001b2150 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -3569,6 +3569,7 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
u16 pf_q = vsi->base_queue + ring->queue_index;
struct i40e_hw *hw = &vsi->back->hw;
struct i40e_hmc_obj_rxq rx_ctx;
+ u32 xdp_frame_sz;
int err = 0;
bool ok;
@@ -3578,49 +3579,47 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
memset(&rx_ctx, 0, sizeof(rx_ctx));
ring->rx_buf_len = vsi->rx_buf_len;
+ xdp_frame_sz = i40e_rx_pg_size(ring) / 2;
/* XDP RX-queue info only needed for RX rings exposed to XDP */
if (ring->vsi->type != I40E_VSI_MAIN)
goto skip;
- if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) {
- err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
- ring->queue_index,
- ring->q_vector->napi.napi_id,
- ring->rx_buf_len);
- if (err)
- return err;
- }
-
ring->xsk_pool = i40e_xsk_pool(ring);
if (ring->xsk_pool) {
- xdp_rxq_info_unreg(&ring->xdp_rxq);
+ xdp_frame_sz = xsk_pool_get_rx_frag_step(ring->xsk_pool);
ring->rx_buf_len = xsk_pool_get_rx_frame_size(ring->xsk_pool);
err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
ring->queue_index,
ring->q_vector->napi.napi_id,
- ring->rx_buf_len);
+ xdp_frame_sz);
if (err)
return err;
err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
MEM_TYPE_XSK_BUFF_POOL,
NULL);
if (err)
- return err;
+ goto unreg_xdp;
dev_info(&vsi->back->pdev->dev,
"Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n",
ring->queue_index);
} else {
+ err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
+ ring->queue_index,
+ ring->q_vector->napi.napi_id,
+ xdp_frame_sz);
+ if (err)
+ return err;
err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
MEM_TYPE_PAGE_SHARED,
NULL);
if (err)
- return err;
+ goto unreg_xdp;
}
skip:
- xdp_init_buff(&ring->xdp, i40e_rx_pg_size(ring) / 2, &ring->xdp_rxq);
+ xdp_init_buff(&ring->xdp, xdp_frame_sz, &ring->xdp_rxq);
rx_ctx.dbuff = DIV_ROUND_UP(ring->rx_buf_len,
BIT_ULL(I40E_RXQ_CTX_DBUFF_SHIFT));
@@ -3654,7 +3653,8 @@ skip:
dev_info(&vsi->back->pdev->dev,
"Failed to clear LAN Rx queue context on Rx ring %d (pf_q %d), error: %d\n",
ring->queue_index, pf_q, err);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto unreg_xdp;
}
/* set the context in the HMC */
@@ -3663,7 +3663,8 @@ skip:
dev_info(&vsi->back->pdev->dev,
"Failed to set LAN Rx queue context on Rx ring %d (pf_q %d), error: %d\n",
ring->queue_index, pf_q, err);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto unreg_xdp;
}
/* configure Rx buffer alignment */
@@ -3671,7 +3672,8 @@ skip:
if (I40E_2K_TOO_SMALL_WITH_PADDING) {
dev_info(&vsi->back->pdev->dev,
"2k Rx buffer is too small to fit standard MTU and skb_shared_info\n");
- return -EOPNOTSUPP;
+ err = -EOPNOTSUPP;
+ goto unreg_xdp;
}
clear_ring_build_skb_enabled(ring);
} else {
@@ -3701,6 +3703,11 @@ skip:
}
return 0;
+unreg_xdp:
+ if (ring->vsi->type == I40E_VSI_MAIN)
+ xdp_rxq_info_unreg(&ring->xdp_rxq);
+
+ return err;
}
/**
diff --git a/drivers/net/ethernet/intel/i40e/i40e_trace.h b/drivers/net/ethernet/intel/i40e/i40e_trace.h
index 759f3d1c4c8f..dde0ccd789ed 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_trace.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_trace.h
@@ -88,7 +88,7 @@ TRACE_EVENT(i40e_napi_poll,
__entry->rx_clean_complete = rx_clean_complete;
__entry->tx_clean_complete = tx_clean_complete;
__entry->irq_num = q->irq_num;
- __entry->curr_cpu = get_cpu();
+ __entry->curr_cpu = smp_processor_id();
__assign_str(qname);
__assign_str(dev_name);
__assign_bitmask(irq_affinity, cpumask_bits(&q->affinity_mask),
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 34db7d8866b0..894f2d06d39d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -1470,6 +1470,9 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
if (!rx_ring->rx_bi)
return;
+ if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
+ xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
+
if (rx_ring->xsk_pool) {
i40e_xsk_clean_rx_ring(rx_ring);
goto skip_free;
@@ -1527,8 +1530,6 @@ skip_free:
void i40e_free_rx_resources(struct i40e_ring *rx_ring)
{
i40e_clean_rx_ring(rx_ring);
- if (rx_ring->vsi->type == I40E_VSI_MAIN)
- xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
rx_ring->xdp_prog = NULL;
kfree(rx_ring->rx_bi);
rx_ring->rx_bi = NULL;
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index bceaf4b1b85d..86c1964f42e1 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -2793,7 +2793,22 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter)
netdev->watchdog_timeo = 5 * HZ;
netdev->min_mtu = ETH_MIN_MTU;
- netdev->max_mtu = LIBIE_MAX_MTU;
+
+ /* PF/VF API: vf_res->max_mtu is max frame size (not MTU).
+ * Convert to MTU.
+ */
+ if (!adapter->vf_res->max_mtu) {
+ netdev->max_mtu = LIBIE_MAX_MTU;
+ } else if (adapter->vf_res->max_mtu < LIBETH_RX_LL_LEN + ETH_MIN_MTU ||
+ adapter->vf_res->max_mtu >
+ LIBETH_RX_LL_LEN + LIBIE_MAX_MTU) {
+ netdev_warn_once(adapter->netdev,
+ "invalid max frame size %d from PF, using default MTU %d",
+ adapter->vf_res->max_mtu, LIBIE_MAX_MTU);
+ netdev->max_mtu = LIBIE_MAX_MTU;
+ } else {
+ netdev->max_mtu = adapter->vf_res->max_mtu - LIBETH_RX_LL_LEN;
+ }
if (!is_valid_ether_addr(adapter->hw.mac.addr)) {
dev_info(&pdev->dev, "Invalid MAC address %pM, using random\n",
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index def7efa15447..2b2b22af42be 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -987,6 +987,7 @@ int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset);
void ice_print_link_msg(struct ice_vsi *vsi, bool isup);
int ice_plug_aux_dev(struct ice_pf *pf);
void ice_unplug_aux_dev(struct ice_pf *pf);
+void ice_rdma_finalize_setup(struct ice_pf *pf);
int ice_init_rdma(struct ice_pf *pf);
void ice_deinit_rdma(struct ice_pf *pf);
bool ice_is_wol_supported(struct ice_hw *hw);
diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
index eb77dfa934aa..1667f686ff75 100644
--- a/drivers/net/ethernet/intel/ice/ice_base.c
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -124,6 +124,8 @@ static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, u16 v_idx)
if (vsi->type == ICE_VSI_VF) {
ice_calc_vf_reg_idx(vsi->vf, q_vector);
goto out;
+ } else if (vsi->type == ICE_VSI_LB) {
+ goto skip_alloc;
} else if (vsi->type == ICE_VSI_CTRL && vsi->vf) {
struct ice_vsi *ctrl_vsi = ice_get_vf_ctrl_vsi(pf, vsi);
@@ -659,33 +661,22 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
{
struct device *dev = ice_pf_to_dev(ring->vsi->back);
u32 num_bufs = ICE_DESC_UNUSED(ring);
- u32 rx_buf_len;
int err;
- if (ring->vsi->type == ICE_VSI_PF || ring->vsi->type == ICE_VSI_SF) {
- if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) {
- err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
- ring->q_index,
- ring->q_vector->napi.napi_id,
- ring->rx_buf_len);
- if (err)
- return err;
- }
-
+ if (ring->vsi->type == ICE_VSI_PF || ring->vsi->type == ICE_VSI_SF ||
+ ring->vsi->type == ICE_VSI_LB) {
ice_rx_xsk_pool(ring);
err = ice_realloc_rx_xdp_bufs(ring, ring->xsk_pool);
if (err)
return err;
if (ring->xsk_pool) {
- xdp_rxq_info_unreg(&ring->xdp_rxq);
-
- rx_buf_len =
- xsk_pool_get_rx_frame_size(ring->xsk_pool);
+ u32 frag_size =
+ xsk_pool_get_rx_frag_step(ring->xsk_pool);
err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
ring->q_index,
ring->q_vector->napi.napi_id,
- rx_buf_len);
+ frag_size);
if (err)
return err;
err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
@@ -702,14 +693,13 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
if (err)
return err;
- if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) {
- err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
- ring->q_index,
- ring->q_vector->napi.napi_id,
- ring->rx_buf_len);
- if (err)
- goto err_destroy_fq;
- }
+ err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
+ ring->q_index,
+ ring->q_vector->napi.napi_id,
+ ring->truesize);
+ if (err)
+ goto err_destroy_fq;
+
xdp_rxq_info_attach_page_pool(&ring->xdp_rxq,
ring->pp);
}
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 6cd63190f55d..ce11fea122d0 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -1816,6 +1816,7 @@ static bool ice_should_retry_sq_send_cmd(u16 opcode)
case ice_aqc_opc_lldp_stop:
case ice_aqc_opc_lldp_start:
case ice_aqc_opc_lldp_filter_ctrl:
+ case ice_aqc_opc_sff_eeprom:
return true;
}
@@ -1841,6 +1842,7 @@ ice_sq_send_cmd_retry(struct ice_hw *hw, struct ice_ctl_q_info *cq,
{
struct libie_aq_desc desc_cpy;
bool is_cmd_for_retry;
+ u8 *buf_cpy = NULL;
u8 idx = 0;
u16 opcode;
int status;
@@ -1850,8 +1852,11 @@ ice_sq_send_cmd_retry(struct ice_hw *hw, struct ice_ctl_q_info *cq,
memset(&desc_cpy, 0, sizeof(desc_cpy));
if (is_cmd_for_retry) {
- /* All retryable cmds are direct, without buf. */
- WARN_ON(buf);
+ if (buf) {
+ buf_cpy = kmemdup(buf, buf_size, GFP_KERNEL);
+ if (!buf_cpy)
+ return -ENOMEM;
+ }
memcpy(&desc_cpy, desc, sizeof(desc_cpy));
}
@@ -1863,12 +1868,14 @@ ice_sq_send_cmd_retry(struct ice_hw *hw, struct ice_ctl_q_info *cq,
hw->adminq.sq_last_status != LIBIE_AQ_RC_EBUSY)
break;
+ if (buf_cpy)
+ memcpy(buf, buf_cpy, buf_size);
memcpy(desc, &desc_cpy, sizeof(desc_cpy));
-
msleep(ICE_SQ_SEND_DELAY_TIME_MS);
} while (++idx < ICE_SQ_SEND_MAX_EXECUTE);
+ kfree(buf_cpy);
return status;
}
@@ -6391,7 +6398,7 @@ int ice_lldp_fltr_add_remove(struct ice_hw *hw, struct ice_vsi *vsi, bool add)
struct ice_aqc_lldp_filter_ctrl *cmd;
struct libie_aq_desc desc;
- if (vsi->type != ICE_VSI_PF || !ice_fw_supports_lldp_fltr_ctrl(hw))
+ if (!ice_fw_supports_lldp_fltr_ctrl(hw))
return -EOPNOTSUPP;
cmd = libie_aq_raw(&desc);
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 0ea64b330614..301947d53ede 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -1289,6 +1289,10 @@ static u64 ice_loopback_test(struct net_device *netdev)
test_vsi->netdev = netdev;
tx_ring = test_vsi->tx_rings[0];
rx_ring = test_vsi->rx_rings[0];
+ /* Dummy q_vector and napi. Fill the minimum required for
+ * ice_rxq_pp_create().
+ */
+ rx_ring->q_vector->napi.dev = netdev;
if (ice_lbtest_prepare_rings(test_vsi)) {
ret = 2;
@@ -3328,7 +3332,7 @@ process_rx:
rx_rings = kzalloc_objs(*rx_rings, vsi->num_rxq);
if (!rx_rings) {
err = -ENOMEM;
- goto done;
+ goto free_xdp;
}
ice_for_each_rxq(vsi, i) {
@@ -3338,6 +3342,7 @@ process_rx:
rx_rings[i].cached_phctime = pf->ptp.cached_phc_time;
rx_rings[i].desc = NULL;
rx_rings[i].xdp_buf = NULL;
+ rx_rings[i].xdp_rxq = (struct xdp_rxq_info){ };
/* this is to allow wr32 to have something to write to
* during early allocation of Rx buffers
@@ -3355,7 +3360,7 @@ rx_unwind:
}
kfree(rx_rings);
err = -ENOMEM;
- goto free_tx;
+ goto free_xdp;
}
}
@@ -3407,6 +3412,13 @@ process_link:
}
goto done;
+free_xdp:
+ if (xdp_rings) {
+ ice_for_each_xdp_txq(vsi, i)
+ ice_free_tx_ring(&xdp_rings[i]);
+ kfree(xdp_rings);
+ }
+
free_tx:
/* error cleanup if the Rx allocations failed after getting Tx */
if (tx_rings) {
@@ -4505,7 +4517,7 @@ ice_get_module_eeprom(struct net_device *netdev,
u8 addr = ICE_I2C_EEPROM_DEV_ADDR;
struct ice_hw *hw = &pf->hw;
bool is_sfp = false;
- unsigned int i, j;
+ unsigned int i;
u16 offset = 0;
u8 page = 0;
int status;
@@ -4547,26 +4559,19 @@ ice_get_module_eeprom(struct net_device *netdev,
if (page == 0 || !(data[0x2] & 0x4)) {
u32 copy_len;
- /* If i2c bus is busy due to slow page change or
- * link management access, call can fail. This is normal.
- * So we retry this a few times.
- */
- for (j = 0; j < 4; j++) {
- status = ice_aq_sff_eeprom(hw, 0, addr, offset, page,
- !is_sfp, value,
- SFF_READ_BLOCK_SIZE,
- 0, NULL);
- netdev_dbg(netdev, "SFF %02X %02X %02X %X = %02X%02X%02X%02X.%02X%02X%02X%02X (%X)\n",
- addr, offset, page, is_sfp,
- value[0], value[1], value[2], value[3],
- value[4], value[5], value[6], value[7],
- status);
- if (status) {
- usleep_range(1500, 2500);
- memset(value, 0, SFF_READ_BLOCK_SIZE);
- continue;
- }
- break;
+ status = ice_aq_sff_eeprom(hw, 0, addr, offset, page,
+ !is_sfp, value,
+ SFF_READ_BLOCK_SIZE,
+ 0, NULL);
+ netdev_dbg(netdev, "SFF %02X %02X %02X %X = %02X%02X%02X%02X.%02X%02X%02X%02X (%pe)\n",
+ addr, offset, page, is_sfp,
+ value[0], value[1], value[2], value[3],
+ value[4], value[5], value[6], value[7],
+ ERR_PTR(status));
+ if (status) {
+ netdev_err(netdev, "%s: error reading module EEPROM: status %pe\n",
+ __func__, ERR_PTR(status));
+ return status;
}
/* Make sure we have enough room for the new block */
diff --git a/drivers/net/ethernet/intel/ice/ice_idc.c b/drivers/net/ethernet/intel/ice/ice_idc.c
index 3572b0e1d49f..102d63c3018b 100644
--- a/drivers/net/ethernet/intel/ice/ice_idc.c
+++ b/drivers/net/ethernet/intel/ice/ice_idc.c
@@ -361,6 +361,39 @@ void ice_unplug_aux_dev(struct ice_pf *pf)
}
/**
+ * ice_rdma_finalize_setup - Complete RDMA setup after VSI is ready
+ * @pf: ptr to ice_pf
+ *
+ * Sets VSI-dependent information and plugs aux device.
+ * Must be called after ice_init_rdma(), ice_vsi_rebuild(), and
+ * ice_dcb_rebuild() complete.
+ */
+void ice_rdma_finalize_setup(struct ice_pf *pf)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ struct iidc_rdma_priv_dev_info *privd;
+ int ret;
+
+ if (!ice_is_rdma_ena(pf) || !pf->cdev_info)
+ return;
+
+ privd = pf->cdev_info->iidc_priv;
+ if (!privd || !pf->vsi || !pf->vsi[0] || !pf->vsi[0]->netdev)
+ return;
+
+ /* Assign VSI info now that VSI is valid */
+ privd->netdev = pf->vsi[0]->netdev;
+ privd->vport_id = pf->vsi[0]->vsi_num;
+
+ /* Update QoS info after DCB has been rebuilt */
+ ice_setup_dcb_qos_info(pf, &privd->qos_info);
+
+ ret = ice_plug_aux_dev(pf);
+ if (ret)
+ dev_warn(dev, "Failed to plug RDMA aux device: %d\n", ret);
+}
+
+/**
* ice_init_rdma - initializes PF for RDMA use
* @pf: ptr to ice_pf
*/
@@ -398,22 +431,14 @@ int ice_init_rdma(struct ice_pf *pf)
}
cdev->iidc_priv = privd;
- privd->netdev = pf->vsi[0]->netdev;
privd->hw_addr = (u8 __iomem *)pf->hw.hw_addr;
cdev->pdev = pf->pdev;
- privd->vport_id = pf->vsi[0]->vsi_num;
pf->cdev_info->rdma_protocol |= IIDC_RDMA_PROTOCOL_ROCEV2;
- ice_setup_dcb_qos_info(pf, &privd->qos_info);
- ret = ice_plug_aux_dev(pf);
- if (ret)
- goto err_plug_aux_dev;
+
return 0;
-err_plug_aux_dev:
- pf->cdev_info->adev = NULL;
- xa_erase(&ice_aux_id, pf->aux_idx);
err_alloc_xa:
kfree(privd);
err_privd_alloc:
@@ -432,7 +457,6 @@ void ice_deinit_rdma(struct ice_pf *pf)
if (!ice_is_rdma_ena(pf))
return;
- ice_unplug_aux_dev(pf);
xa_erase(&ice_aux_id, pf->aux_idx);
kfree(pf->cdev_info->iidc_priv);
kfree(pf->cdev_info);
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index f91f8b672b02..689c6025ea82 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -107,10 +107,6 @@ static int ice_vsi_alloc_arrays(struct ice_vsi *vsi)
if (!vsi->rxq_map)
goto err_rxq_map;
- /* There is no need to allocate q_vectors for a loopback VSI. */
- if (vsi->type == ICE_VSI_LB)
- return 0;
-
/* allocate memory for q_vector pointers */
vsi->q_vectors = devm_kcalloc(dev, vsi->num_q_vectors,
sizeof(*vsi->q_vectors), GFP_KERNEL);
@@ -241,6 +237,8 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi)
case ICE_VSI_LB:
vsi->alloc_txq = 1;
vsi->alloc_rxq = 1;
+ /* A dummy q_vector, no actual IRQ. */
+ vsi->num_q_vectors = 1;
break;
default:
dev_warn(ice_pf_to_dev(pf), "Unknown VSI type %d\n", vsi_type);
@@ -2426,14 +2424,21 @@ static int ice_vsi_cfg_def(struct ice_vsi *vsi)
}
break;
case ICE_VSI_LB:
- ret = ice_vsi_alloc_rings(vsi);
+ ret = ice_vsi_alloc_q_vectors(vsi);
if (ret)
goto unroll_vsi_init;
+ ret = ice_vsi_alloc_rings(vsi);
+ if (ret)
+ goto unroll_alloc_q_vector;
+
ret = ice_vsi_alloc_ring_stats(vsi);
if (ret)
goto unroll_vector_base;
+ /* Simply map the dummy q_vector to the only rx_ring */
+ vsi->rx_rings[0]->q_vector = vsi->q_vectors[0];
+
break;
default:
/* clean up the resources and exit */
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index ebf48feffb30..e7308e381e2f 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -5138,6 +5138,9 @@ int ice_load(struct ice_pf *pf)
if (err)
goto err_init_rdma;
+ /* Finalize RDMA: VSI already created, assign info and plug device */
+ ice_rdma_finalize_setup(pf);
+
ice_service_task_restart(pf);
clear_bit(ICE_DOWN, pf->state);
@@ -5169,6 +5172,7 @@ void ice_unload(struct ice_pf *pf)
devl_assert_locked(priv_to_devlink(pf));
+ ice_unplug_aux_dev(pf);
ice_deinit_rdma(pf);
ice_deinit_features(pf);
ice_tc_indir_block_unregister(vsi);
@@ -5595,6 +5599,7 @@ static int ice_suspend(struct device *dev)
*/
disabled = ice_service_task_stop(pf);
+ ice_unplug_aux_dev(pf);
ice_deinit_rdma(pf);
/* Already suspended?, then there is nothing to do */
@@ -7859,7 +7864,7 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
ice_health_clear(pf);
- ice_plug_aux_dev(pf);
+ ice_rdma_finalize_setup(pf);
if (ice_is_feature_supported(pf, ICE_F_SRIOV_LAG))
ice_lag_rebuild(pf);
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index a5bbce68f76c..a2cd4cf37734 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -560,7 +560,9 @@ void ice_clean_rx_ring(struct ice_rx_ring *rx_ring)
i = 0;
}
- if (rx_ring->vsi->type == ICE_VSI_PF &&
+ if ((rx_ring->vsi->type == ICE_VSI_PF ||
+ rx_ring->vsi->type == ICE_VSI_SF ||
+ rx_ring->vsi->type == ICE_VSI_LB) &&
xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) {
xdp_rxq_info_detach_mem_model(&rx_ring->xdp_rxq);
xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index c673094663a3..0643017541c3 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -899,6 +899,9 @@ void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring)
u16 ntc = rx_ring->next_to_clean;
u16 ntu = rx_ring->next_to_use;
+ if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
+ xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
+
while (ntc != ntu) {
struct xdp_buff *xdp = *ice_xdp_buf(rx_ring, ntc);
diff --git a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c
index 40e08a71d2d3..bb99d9e7c65d 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c
@@ -307,9 +307,6 @@ static int idpf_del_flow_steer(struct net_device *netdev,
vport_config = vport->adapter->vport_config[np->vport_idx];
user_config = &vport_config->user_config;
- if (!idpf_sideband_action_ena(vport, fsp))
- return -EOPNOTSUPP;
-
rule = kzalloc_flex(*rule, rule_info, 1);
if (!rule)
return -ENOMEM;
diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c
index de3df705a7e6..cf966fe6c759 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_lib.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c
@@ -1318,6 +1318,7 @@ static struct idpf_vport *idpf_vport_alloc(struct idpf_adapter *adapter,
free_rss_key:
kfree(rss_data->rss_key);
+ rss_data->rss_key = NULL;
free_qreg_chunks:
idpf_vport_deinit_queue_reg_chunks(adapter->vport_config[idx]);
free_vector_idxs:
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
index 05a162094d10..252259993022 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
@@ -1314,6 +1314,9 @@ static void idpf_txq_group_rel(struct idpf_q_vec_rsrc *rsrc)
struct idpf_txq_group *txq_grp = &rsrc->txq_grps[i];
for (unsigned int j = 0; j < txq_grp->num_txq; j++) {
+ if (!txq_grp->txqs[j])
+ continue;
+
if (idpf_queue_has(FLOW_SCH_EN, txq_grp->txqs[j])) {
kfree(txq_grp->txqs[j]->refillq);
txq_grp->txqs[j]->refillq = NULL;
@@ -1339,6 +1342,9 @@ static void idpf_txq_group_rel(struct idpf_q_vec_rsrc *rsrc)
*/
static void idpf_rxq_sw_queue_rel(struct idpf_rxq_group *rx_qgrp)
{
+ if (!rx_qgrp->splitq.bufq_sets)
+ return;
+
for (unsigned int i = 0; i < rx_qgrp->splitq.num_bufq_sets; i++) {
struct idpf_bufq_set *bufq_set = &rx_qgrp->splitq.bufq_sets[i];
@@ -2336,7 +2342,7 @@ void idpf_wait_for_sw_marker_completion(const struct idpf_tx_queue *txq)
do {
struct idpf_splitq_4b_tx_compl_desc *tx_desc;
- struct idpf_tx_queue *target;
+ struct idpf_tx_queue *target = NULL;
u32 ctype_gen, id;
tx_desc = flow ? &complq->comp[ntc].common :
@@ -2356,14 +2362,14 @@ void idpf_wait_for_sw_marker_completion(const struct idpf_tx_queue *txq)
target = complq->txq_grp->txqs[id];
idpf_queue_clear(SW_MARKER, target);
- if (target == txq)
- break;
next:
if (unlikely(++ntc == complq->desc_count)) {
ntc = 0;
gen_flag = !gen_flag;
}
+ if (target == txq)
+ break;
} while (time_before(jiffies, timeout));
idpf_queue_assign(GEN_CHK, complq, gen_flag);
@@ -4059,7 +4065,7 @@ static int idpf_vport_intr_req_irq(struct idpf_vport *vport,
continue;
name = kasprintf(GFP_KERNEL, "%s-%s-%s-%d", drv_name, if_name,
- vec_name, vidx);
+ vec_name, vector);
err = request_irq(irq_num, idpf_vport_intr_clean_queues, 0,
name, q_vector);
diff --git a/drivers/net/ethernet/intel/idpf/xdp.c b/drivers/net/ethernet/intel/idpf/xdp.c
index 6ac9c6624c2a..cbccd4546768 100644
--- a/drivers/net/ethernet/intel/idpf/xdp.c
+++ b/drivers/net/ethernet/intel/idpf/xdp.c
@@ -47,12 +47,16 @@ static int __idpf_xdp_rxq_info_init(struct idpf_rx_queue *rxq, void *arg)
{
const struct idpf_vport *vport = rxq->q_vector->vport;
const struct idpf_q_vec_rsrc *rsrc;
+ u32 frag_size = 0;
bool split;
int err;
+ if (idpf_queue_has(XSK, rxq))
+ frag_size = rxq->bufq_sets[0].bufq.truesize;
+
err = __xdp_rxq_info_reg(&rxq->xdp_rxq, vport->netdev, rxq->idx,
rxq->q_vector->napi.napi_id,
- rxq->rx_buf_size);
+ frag_size);
if (err)
return err;
diff --git a/drivers/net/ethernet/intel/idpf/xsk.c b/drivers/net/ethernet/intel/idpf/xsk.c
index 676cbd80774d..d95d3efdfd36 100644
--- a/drivers/net/ethernet/intel/idpf/xsk.c
+++ b/drivers/net/ethernet/intel/idpf/xsk.c
@@ -403,6 +403,7 @@ int idpf_xskfq_init(struct idpf_buf_queue *bufq)
bufq->pending = fq.pending;
bufq->thresh = fq.thresh;
bufq->rx_buf_size = fq.buf_len;
+ bufq->truesize = fq.truesize;
if (!idpf_xskfq_refill(bufq))
netdev_err(bufq->pool->netdev,
diff --git a/drivers/net/ethernet/intel/igb/igb_xsk.c b/drivers/net/ethernet/intel/igb/igb_xsk.c
index 30ce5fbb5b77..ce4a7b58cad2 100644
--- a/drivers/net/ethernet/intel/igb/igb_xsk.c
+++ b/drivers/net/ethernet/intel/igb/igb_xsk.c
@@ -524,6 +524,16 @@ bool igb_xmit_zc(struct igb_ring *tx_ring, struct xsk_buff_pool *xsk_pool)
return nb_pkts < budget;
}
+static u32 igb_sw_irq_prep(struct igb_q_vector *q_vector)
+{
+ u32 eics = 0;
+
+ if (!napi_if_scheduled_mark_missed(&q_vector->napi))
+ eics = q_vector->eims_value;
+
+ return eics;
+}
+
int igb_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
{
struct igb_adapter *adapter = netdev_priv(dev);
@@ -542,20 +552,32 @@ int igb_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
ring = adapter->tx_ring[qid];
- if (test_bit(IGB_RING_FLAG_TX_DISABLED, &ring->flags))
- return -ENETDOWN;
-
if (!READ_ONCE(ring->xsk_pool))
return -EINVAL;
- if (!napi_if_scheduled_mark_missed(&ring->q_vector->napi)) {
+ if (flags & XDP_WAKEUP_TX) {
+ if (test_bit(IGB_RING_FLAG_TX_DISABLED, &ring->flags))
+ return -ENETDOWN;
+
+ eics |= igb_sw_irq_prep(ring->q_vector);
+ }
+
+ if (flags & XDP_WAKEUP_RX) {
+ /* If IGB_FLAG_QUEUE_PAIRS is active, the q_vector
+ * and NAPI is shared between RX and TX.
+ * If NAPI is already running it would be marked as missed
+ * from the TX path, making this RX call a NOP
+ */
+ ring = adapter->rx_ring[qid];
+ eics |= igb_sw_irq_prep(ring->q_vector);
+ }
+
+ if (eics) {
/* Cause software interrupt */
- if (adapter->flags & IGB_FLAG_HAS_MSIX) {
- eics |= ring->q_vector->eims_value;
+ if (adapter->flags & IGB_FLAG_HAS_MSIX)
wr32(E1000_EICS, eics);
- } else {
+ else
wr32(E1000_ICS, E1000_ICS_RXDMT0);
- }
}
return 0;
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 27e5c2109138..b2e8d0c0f827 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -6906,28 +6906,29 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames,
return nxmit;
}
-static void igc_trigger_rxtxq_interrupt(struct igc_adapter *adapter,
- struct igc_q_vector *q_vector)
+static u32 igc_sw_irq_prep(struct igc_q_vector *q_vector)
{
- struct igc_hw *hw = &adapter->hw;
u32 eics = 0;
- eics |= q_vector->eims_value;
- wr32(IGC_EICS, eics);
+ if (!napi_if_scheduled_mark_missed(&q_vector->napi))
+ eics = q_vector->eims_value;
+
+ return eics;
}
int igc_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
{
struct igc_adapter *adapter = netdev_priv(dev);
- struct igc_q_vector *q_vector;
+ struct igc_hw *hw = &adapter->hw;
struct igc_ring *ring;
+ u32 eics = 0;
if (test_bit(__IGC_DOWN, &adapter->state))
return -ENETDOWN;
if (!igc_xdp_is_enabled(adapter))
return -ENXIO;
-
+ /* Check if queue_id is valid. Tx and Rx queue numbers are always same */
if (queue_id >= adapter->num_rx_queues)
return -EINVAL;
@@ -6936,9 +6937,22 @@ int igc_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
if (!ring->xsk_pool)
return -ENXIO;
- q_vector = adapter->q_vector[queue_id];
- if (!napi_if_scheduled_mark_missed(&q_vector->napi))
- igc_trigger_rxtxq_interrupt(adapter, q_vector);
+ if (flags & XDP_WAKEUP_RX)
+ eics |= igc_sw_irq_prep(ring->q_vector);
+
+ if (flags & XDP_WAKEUP_TX) {
+ /* If IGC_FLAG_QUEUE_PAIRS is active, the q_vector
+ * and NAPI is shared between RX and TX.
+ * If NAPI is already running it would be marked as missed
+ * from the RX path, making this TX call a NOP
+ */
+ ring = adapter->tx_ring[queue_id];
+ eics |= igc_sw_irq_prep(ring->q_vector);
+ }
+
+ if (eics)
+ /* Cause software interrupt */
+ wr32(IGC_EICS, eics);
return 0;
}
diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c
index 7aae83c108fd..44ee19386766 100644
--- a/drivers/net/ethernet/intel/igc/igc_ptp.c
+++ b/drivers/net/ethernet/intel/igc/igc_ptp.c
@@ -550,7 +550,8 @@ static void igc_ptp_free_tx_buffer(struct igc_adapter *adapter,
tstamp->buffer_type = 0;
/* Trigger txrx interrupt for transmit completion */
- igc_xsk_wakeup(adapter->netdev, tstamp->xsk_queue_index, 0);
+ igc_xsk_wakeup(adapter->netdev, tstamp->xsk_queue_index,
+ XDP_WAKEUP_TX);
return;
}
diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c
index 74d320879513..b67b580f7f1c 100644
--- a/drivers/net/ethernet/intel/ixgbevf/vf.c
+++ b/drivers/net/ethernet/intel/ixgbevf/vf.c
@@ -852,7 +852,8 @@ static s32 ixgbevf_check_mac_link_vf(struct ixgbe_hw *hw,
if (!mac->get_link_status)
goto out;
- if (hw->mac.type == ixgbe_mac_e610_vf) {
+ if (hw->mac.type == ixgbe_mac_e610_vf &&
+ hw->api_version >= ixgbe_mbox_api_16) {
ret_val = ixgbevf_get_pf_link_state(hw, speed, link_up);
if (ret_val)
goto out;
diff --git a/drivers/net/ethernet/intel/libeth/xsk.c b/drivers/net/ethernet/intel/libeth/xsk.c
index 846e902e31b6..4882951d5c9c 100644
--- a/drivers/net/ethernet/intel/libeth/xsk.c
+++ b/drivers/net/ethernet/intel/libeth/xsk.c
@@ -167,6 +167,7 @@ int libeth_xskfq_create(struct libeth_xskfq *fq)
fq->pending = fq->count;
fq->thresh = libeth_xdp_queue_threshold(fq->count);
fq->buf_len = xsk_pool_get_rx_frame_size(fq->pool);
+ fq->truesize = xsk_pool_get_rx_frag_step(fq->pool);
return 0;
}
diff --git a/drivers/net/ethernet/intel/libie/fwlog.c b/drivers/net/ethernet/intel/libie/fwlog.c
index 79020d990859..4d0c8370386b 100644
--- a/drivers/net/ethernet/intel/libie/fwlog.c
+++ b/drivers/net/ethernet/intel/libie/fwlog.c
@@ -1049,6 +1049,10 @@ void libie_fwlog_deinit(struct libie_fwlog *fwlog)
{
int status;
+ /* if FW logging isn't supported it means no configuration was done */
+ if (!libie_fwlog_supported(fwlog))
+ return;
+
/* make sure FW logging is disabled to not put the FW in a weird state
* for the next driver load
*/
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
index ec55eb2a6c04..028dd55604ea 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
@@ -554,28 +554,43 @@ static void octep_clean_irqs(struct octep_device *oct)
}
/**
- * octep_enable_ioq_irq() - Enable MSI-x interrupt of a Tx/Rx queue.
+ * octep_update_pkt() - Update IQ/OQ IN/OUT_CNT registers.
*
* @iq: Octeon Tx queue data structure.
* @oq: Octeon Rx queue data structure.
*/
-static void octep_enable_ioq_irq(struct octep_iq *iq, struct octep_oq *oq)
+static void octep_update_pkt(struct octep_iq *iq, struct octep_oq *oq)
{
- u32 pkts_pend = oq->pkts_pending;
+ u32 pkts_pend = READ_ONCE(oq->pkts_pending);
+ u32 last_pkt_count = READ_ONCE(oq->last_pkt_count);
+ u32 pkts_processed = READ_ONCE(iq->pkts_processed);
+ u32 pkt_in_done = READ_ONCE(iq->pkt_in_done);
netdev_dbg(iq->netdev, "enabling intr for Q-%u\n", iq->q_no);
- if (iq->pkts_processed) {
- writel(iq->pkts_processed, iq->inst_cnt_reg);
- iq->pkt_in_done -= iq->pkts_processed;
- iq->pkts_processed = 0;
+ if (pkts_processed) {
+ writel(pkts_processed, iq->inst_cnt_reg);
+ readl(iq->inst_cnt_reg);
+ WRITE_ONCE(iq->pkt_in_done, (pkt_in_done - pkts_processed));
+ WRITE_ONCE(iq->pkts_processed, 0);
}
- if (oq->last_pkt_count - pkts_pend) {
- writel(oq->last_pkt_count - pkts_pend, oq->pkts_sent_reg);
- oq->last_pkt_count = pkts_pend;
+ if (last_pkt_count - pkts_pend) {
+ writel(last_pkt_count - pkts_pend, oq->pkts_sent_reg);
+ readl(oq->pkts_sent_reg);
+ WRITE_ONCE(oq->last_pkt_count, pkts_pend);
}
/* Flush the previous wrties before writing to RESEND bit */
- wmb();
+ smp_wmb();
+}
+
+/**
+ * octep_enable_ioq_irq() - Enable MSI-x interrupt of a Tx/Rx queue.
+ *
+ * @iq: Octeon Tx queue data structure.
+ * @oq: Octeon Rx queue data structure.
+ */
+static void octep_enable_ioq_irq(struct octep_iq *iq, struct octep_oq *oq)
+{
writeq(1UL << OCTEP_OQ_INTR_RESEND_BIT, oq->pkts_sent_reg);
writeq(1UL << OCTEP_IQ_INTR_RESEND_BIT, iq->inst_cnt_reg);
}
@@ -601,7 +616,8 @@ static int octep_napi_poll(struct napi_struct *napi, int budget)
if (tx_pending || rx_done >= budget)
return budget;
- napi_complete(napi);
+ octep_update_pkt(ioq_vector->iq, ioq_vector->oq);
+ napi_complete_done(napi, rx_done);
octep_enable_ioq_irq(ioq_vector->iq, ioq_vector->oq);
return rx_done;
}
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c b/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c
index f2a7c6a76c74..74de19166488 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c
@@ -324,10 +324,16 @@ static int octep_oq_check_hw_for_pkts(struct octep_device *oct,
struct octep_oq *oq)
{
u32 pkt_count, new_pkts;
+ u32 last_pkt_count, pkts_pending;
pkt_count = readl(oq->pkts_sent_reg);
- new_pkts = pkt_count - oq->last_pkt_count;
+ last_pkt_count = READ_ONCE(oq->last_pkt_count);
+ new_pkts = pkt_count - last_pkt_count;
+ if (pkt_count < last_pkt_count) {
+ dev_err(oq->dev, "OQ-%u pkt_count(%u) < oq->last_pkt_count(%u)\n",
+ oq->q_no, pkt_count, last_pkt_count);
+ }
/* Clear the hardware packets counter register if the rx queue is
* being processed continuously with-in a single interrupt and
* reached half its max value.
@@ -338,8 +344,9 @@ static int octep_oq_check_hw_for_pkts(struct octep_device *oct,
pkt_count = readl(oq->pkts_sent_reg);
new_pkts += pkt_count;
}
- oq->last_pkt_count = pkt_count;
- oq->pkts_pending += new_pkts;
+ WRITE_ONCE(oq->last_pkt_count, pkt_count);
+ pkts_pending = READ_ONCE(oq->pkts_pending);
+ WRITE_ONCE(oq->pkts_pending, (pkts_pending + new_pkts));
return new_pkts;
}
@@ -414,7 +421,7 @@ static int __octep_oq_process_rx(struct octep_device *oct,
u16 rx_ol_flags;
u32 read_idx;
- read_idx = oq->host_read_idx;
+ read_idx = READ_ONCE(oq->host_read_idx);
rx_bytes = 0;
desc_used = 0;
for (pkt = 0; pkt < pkts_to_process; pkt++) {
@@ -499,7 +506,7 @@ static int __octep_oq_process_rx(struct octep_device *oct,
napi_gro_receive(oq->napi, skb);
}
- oq->host_read_idx = read_idx;
+ WRITE_ONCE(oq->host_read_idx, read_idx);
oq->refill_count += desc_used;
oq->stats->packets += pkt;
oq->stats->bytes += rx_bytes;
@@ -522,22 +529,26 @@ int octep_oq_process_rx(struct octep_oq *oq, int budget)
{
u32 pkts_available, pkts_processed, total_pkts_processed;
struct octep_device *oct = oq->octep_dev;
+ u32 pkts_pending;
pkts_available = 0;
pkts_processed = 0;
total_pkts_processed = 0;
while (total_pkts_processed < budget) {
/* update pending count only when current one exhausted */
- if (oq->pkts_pending == 0)
+ pkts_pending = READ_ONCE(oq->pkts_pending);
+ if (pkts_pending == 0)
octep_oq_check_hw_for_pkts(oct, oq);
+ pkts_pending = READ_ONCE(oq->pkts_pending);
pkts_available = min(budget - total_pkts_processed,
- oq->pkts_pending);
+ pkts_pending);
if (!pkts_available)
break;
pkts_processed = __octep_oq_process_rx(oct, oq,
pkts_available);
- oq->pkts_pending -= pkts_processed;
+ pkts_pending = READ_ONCE(oq->pkts_pending);
+ WRITE_ONCE(oq->pkts_pending, (pkts_pending - pkts_processed));
total_pkts_processed += pkts_processed;
}
diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c
index 562fe945b422..7f1b93cffb98 100644
--- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c
+++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c
@@ -286,28 +286,45 @@ static void octep_vf_clean_irqs(struct octep_vf_device *oct)
}
/**
- * octep_vf_enable_ioq_irq() - Enable MSI-x interrupt of a Tx/Rx queue.
+ * octep_vf_update_pkt() - Update IQ/OQ IN/OUT_CNT registers.
*
* @iq: Octeon Tx queue data structure.
* @oq: Octeon Rx queue data structure.
*/
-static void octep_vf_enable_ioq_irq(struct octep_vf_iq *iq, struct octep_vf_oq *oq)
+
+static void octep_vf_update_pkt(struct octep_vf_iq *iq, struct octep_vf_oq *oq)
{
- u32 pkts_pend = oq->pkts_pending;
+ u32 pkts_pend = READ_ONCE(oq->pkts_pending);
+ u32 last_pkt_count = READ_ONCE(oq->last_pkt_count);
+ u32 pkts_processed = READ_ONCE(iq->pkts_processed);
+ u32 pkt_in_done = READ_ONCE(iq->pkt_in_done);
netdev_dbg(iq->netdev, "enabling intr for Q-%u\n", iq->q_no);
- if (iq->pkts_processed) {
- writel(iq->pkts_processed, iq->inst_cnt_reg);
- iq->pkt_in_done -= iq->pkts_processed;
- iq->pkts_processed = 0;
+ if (pkts_processed) {
+ writel(pkts_processed, iq->inst_cnt_reg);
+ readl(iq->inst_cnt_reg);
+ WRITE_ONCE(iq->pkt_in_done, (pkt_in_done - pkts_processed));
+ WRITE_ONCE(iq->pkts_processed, 0);
}
- if (oq->last_pkt_count - pkts_pend) {
- writel(oq->last_pkt_count - pkts_pend, oq->pkts_sent_reg);
- oq->last_pkt_count = pkts_pend;
+ if (last_pkt_count - pkts_pend) {
+ writel(last_pkt_count - pkts_pend, oq->pkts_sent_reg);
+ readl(oq->pkts_sent_reg);
+ WRITE_ONCE(oq->last_pkt_count, pkts_pend);
}
/* Flush the previous wrties before writing to RESEND bit */
smp_wmb();
+}
+
+/**
+ * octep_vf_enable_ioq_irq() - Enable MSI-x interrupt of a Tx/Rx queue.
+ *
+ * @iq: Octeon Tx queue data structure.
+ * @oq: Octeon Rx queue data structure.
+ */
+static void octep_vf_enable_ioq_irq(struct octep_vf_iq *iq,
+ struct octep_vf_oq *oq)
+{
writeq(1UL << OCTEP_VF_OQ_INTR_RESEND_BIT, oq->pkts_sent_reg);
writeq(1UL << OCTEP_VF_IQ_INTR_RESEND_BIT, iq->inst_cnt_reg);
}
@@ -333,6 +350,7 @@ static int octep_vf_napi_poll(struct napi_struct *napi, int budget)
if (tx_pending || rx_done >= budget)
return budget;
+ octep_vf_update_pkt(ioq_vector->iq, ioq_vector->oq);
if (likely(napi_complete_done(napi, rx_done)))
octep_vf_enable_ioq_irq(ioq_vector->iq, ioq_vector->oq);
diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c
index 6f865dbbba6c..b579d5b545c4 100644
--- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c
+++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c
@@ -325,9 +325,16 @@ static int octep_vf_oq_check_hw_for_pkts(struct octep_vf_device *oct,
struct octep_vf_oq *oq)
{
u32 pkt_count, new_pkts;
+ u32 last_pkt_count, pkts_pending;
pkt_count = readl(oq->pkts_sent_reg);
- new_pkts = pkt_count - oq->last_pkt_count;
+ last_pkt_count = READ_ONCE(oq->last_pkt_count);
+ new_pkts = pkt_count - last_pkt_count;
+
+ if (pkt_count < last_pkt_count) {
+ dev_err(oq->dev, "OQ-%u pkt_count(%u) < oq->last_pkt_count(%u)\n",
+ oq->q_no, pkt_count, last_pkt_count);
+ }
/* Clear the hardware packets counter register if the rx queue is
* being processed continuously with-in a single interrupt and
@@ -339,8 +346,9 @@ static int octep_vf_oq_check_hw_for_pkts(struct octep_vf_device *oct,
pkt_count = readl(oq->pkts_sent_reg);
new_pkts += pkt_count;
}
- oq->last_pkt_count = pkt_count;
- oq->pkts_pending += new_pkts;
+ WRITE_ONCE(oq->last_pkt_count, pkt_count);
+ pkts_pending = READ_ONCE(oq->pkts_pending);
+ WRITE_ONCE(oq->pkts_pending, (pkts_pending + new_pkts));
return new_pkts;
}
@@ -369,7 +377,7 @@ static int __octep_vf_oq_process_rx(struct octep_vf_device *oct,
struct sk_buff *skb;
u32 read_idx;
- read_idx = oq->host_read_idx;
+ read_idx = READ_ONCE(oq->host_read_idx);
rx_bytes = 0;
desc_used = 0;
for (pkt = 0; pkt < pkts_to_process; pkt++) {
@@ -463,7 +471,7 @@ static int __octep_vf_oq_process_rx(struct octep_vf_device *oct,
napi_gro_receive(oq->napi, skb);
}
- oq->host_read_idx = read_idx;
+ WRITE_ONCE(oq->host_read_idx, read_idx);
oq->refill_count += desc_used;
oq->stats->packets += pkt;
oq->stats->bytes += rx_bytes;
@@ -486,22 +494,26 @@ int octep_vf_oq_process_rx(struct octep_vf_oq *oq, int budget)
{
u32 pkts_available, pkts_processed, total_pkts_processed;
struct octep_vf_device *oct = oq->octep_vf_dev;
+ u32 pkts_pending;
pkts_available = 0;
pkts_processed = 0;
total_pkts_processed = 0;
while (total_pkts_processed < budget) {
/* update pending count only when current one exhausted */
- if (oq->pkts_pending == 0)
+ pkts_pending = READ_ONCE(oq->pkts_pending);
+ if (pkts_pending == 0)
octep_vf_oq_check_hw_for_pkts(oct, oq);
+ pkts_pending = READ_ONCE(oq->pkts_pending);
pkts_available = min(budget - total_pkts_processed,
- oq->pkts_pending);
+ pkts_pending);
if (!pkts_available)
break;
pkts_processed = __octep_vf_oq_process_rx(oct, oq,
pkts_available);
- oq->pkts_pending -= pkts_processed;
+ pkts_pending = READ_ONCE(oq->pkts_pending);
+ WRITE_ONCE(oq->pkts_pending, (pkts_pending - pkts_processed));
total_pkts_processed += pkts_processed;
}
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 1060764b9f17..bb8ced22ca3b 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -3770,12 +3770,21 @@ static int mtk_xdp_setup(struct net_device *dev, struct bpf_prog *prog,
mtk_stop(dev);
old_prog = rcu_replace_pointer(eth->prog, prog, lockdep_rtnl_is_held());
+
+ if (netif_running(dev) && need_update) {
+ int err;
+
+ err = mtk_open(dev);
+ if (err) {
+ rcu_assign_pointer(eth->prog, old_prog);
+
+ return err;
+ }
+ }
+
if (old_prog)
bpf_prog_put(old_prog);
- if (netif_running(dev) && need_update)
- return mtk_open(dev);
-
return 0;
}
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index a868c28c8280..ea71de39f996 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -1770,8 +1770,14 @@ static void mana_poll_tx_cq(struct mana_cq *cq)
ndev = txq->ndev;
apc = netdev_priv(ndev);
+ /* Limit CQEs polled to 4 wraparounds of the CQ to ensure the
+ * doorbell can be rung in time for the hardware's requirement
+ * of at least one doorbell ring every 8 wraparounds.
+ */
comp_read = mana_gd_poll_cq(cq->gdma_cq, completions,
- CQE_POLLING_BUFFER);
+ min((cq->gdma_cq->queue_size /
+ COMP_ENTRY_SIZE) * 4,
+ CQE_POLLING_BUFFER));
if (comp_read < 1)
return;
@@ -2156,7 +2162,14 @@ static void mana_poll_rx_cq(struct mana_cq *cq)
struct mana_rxq *rxq = cq->rxq;
int comp_read, i;
- comp_read = mana_gd_poll_cq(cq->gdma_cq, comp, CQE_POLLING_BUFFER);
+ /* Limit CQEs polled to 4 wraparounds of the CQ to ensure the
+ * doorbell can be rung in time for the hardware's requirement
+ * of at least one doorbell ring every 8 wraparounds.
+ */
+ comp_read = mana_gd_poll_cq(cq->gdma_cq, comp,
+ min((cq->gdma_cq->queue_size /
+ COMP_ENTRY_SIZE) * 4,
+ CQE_POLLING_BUFFER));
WARN_ON_ONCE(comp_read > CQE_POLLING_BUFFER);
rxq->xdp_flush = false;
@@ -2201,11 +2214,11 @@ static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue)
mana_gd_ring_cq(gdma_queue, SET_ARM_BIT);
cq->work_done_since_doorbell = 0;
napi_complete_done(&cq->napi, w);
- } else if (cq->work_done_since_doorbell >
- cq->gdma_cq->queue_size / COMP_ENTRY_SIZE * 4) {
+ } else if (cq->work_done_since_doorbell >=
+ (cq->gdma_cq->queue_size / COMP_ENTRY_SIZE) * 4) {
/* MANA hardware requires at least one doorbell ring every 8
* wraparounds of CQ even if there is no need to arm the CQ.
- * This driver rings the doorbell as soon as we have exceeded
+ * This driver rings the doorbell as soon as it has processed
* 4 wraparounds.
*/
mana_gd_ring_cq(gdma_queue, 0);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index e98cb9f3a44c..1fe96cd24b4f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -328,6 +328,7 @@ struct stmmac_priv {
void __iomem *ptpaddr;
void __iomem *estaddr;
unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
+ unsigned int num_double_vlans;
int sfty_irq;
int sfty_ce_irq;
int sfty_ue_irq;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 029e9b518160..f0160ff54a59 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -157,6 +157,7 @@ static void stmmac_tx_timer_arm(struct stmmac_priv *priv, u32 queue);
static void stmmac_flush_tx_descriptors(struct stmmac_priv *priv, int queue);
static void stmmac_set_dma_operation_mode(struct stmmac_priv *priv, u32 txmode,
u32 rxmode, u32 chan);
+static int stmmac_vlan_restore(struct stmmac_priv *priv);
#ifdef CONFIG_DEBUG_FS
static const struct net_device_ops stmmac_netdev_ops;
@@ -4097,6 +4098,8 @@ static int __stmmac_open(struct net_device *dev,
phylink_start(priv->phylink);
+ stmmac_vlan_restore(priv);
+
ret = stmmac_request_irq(dev);
if (ret)
goto irq_error;
@@ -6757,6 +6760,9 @@ static int stmmac_vlan_update(struct stmmac_priv *priv, bool is_double)
hash = 0;
}
+ if (!netif_running(priv->dev))
+ return 0;
+
return stmmac_update_vlan_hash(priv, priv->hw, hash, pmatch, is_double);
}
@@ -6766,6 +6772,7 @@ static int stmmac_vlan_update(struct stmmac_priv *priv, bool is_double)
static int stmmac_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid)
{
struct stmmac_priv *priv = netdev_priv(ndev);
+ unsigned int num_double_vlans;
bool is_double = false;
int ret;
@@ -6777,7 +6784,8 @@ static int stmmac_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid
is_double = true;
set_bit(vid, priv->active_vlans);
- ret = stmmac_vlan_update(priv, is_double);
+ num_double_vlans = priv->num_double_vlans + is_double;
+ ret = stmmac_vlan_update(priv, num_double_vlans);
if (ret) {
clear_bit(vid, priv->active_vlans);
goto err_pm_put;
@@ -6785,9 +6793,15 @@ static int stmmac_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid
if (priv->hw->num_vlan) {
ret = stmmac_add_hw_vlan_rx_fltr(priv, ndev, priv->hw, proto, vid);
- if (ret)
+ if (ret) {
+ clear_bit(vid, priv->active_vlans);
+ stmmac_vlan_update(priv, priv->num_double_vlans);
goto err_pm_put;
+ }
}
+
+ priv->num_double_vlans = num_double_vlans;
+
err_pm_put:
pm_runtime_put(priv->device);
@@ -6800,6 +6814,7 @@ err_pm_put:
static int stmmac_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vid)
{
struct stmmac_priv *priv = netdev_priv(ndev);
+ unsigned int num_double_vlans;
bool is_double = false;
int ret;
@@ -6811,14 +6826,23 @@ static int stmmac_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vi
is_double = true;
clear_bit(vid, priv->active_vlans);
+ num_double_vlans = priv->num_double_vlans - is_double;
+ ret = stmmac_vlan_update(priv, num_double_vlans);
+ if (ret) {
+ set_bit(vid, priv->active_vlans);
+ goto del_vlan_error;
+ }
if (priv->hw->num_vlan) {
ret = stmmac_del_hw_vlan_rx_fltr(priv, ndev, priv->hw, proto, vid);
- if (ret)
+ if (ret) {
+ set_bit(vid, priv->active_vlans);
+ stmmac_vlan_update(priv, priv->num_double_vlans);
goto del_vlan_error;
+ }
}
- ret = stmmac_vlan_update(priv, is_double);
+ priv->num_double_vlans = num_double_vlans;
del_vlan_error:
pm_runtime_put(priv->device);
@@ -6826,6 +6850,23 @@ del_vlan_error:
return ret;
}
+static int stmmac_vlan_restore(struct stmmac_priv *priv)
+{
+ int ret;
+
+ if (!(priv->dev->features & NETIF_F_VLAN_FEATURES))
+ return 0;
+
+ if (priv->hw->num_vlan)
+ stmmac_restore_hw_vlan_rx_fltr(priv, priv->dev, priv->hw);
+
+ ret = stmmac_vlan_update(priv, priv->num_double_vlans);
+ if (ret)
+ netdev_err(priv->dev, "Failed to restore VLANs\n");
+
+ return ret;
+}
+
static int stmmac_bpf(struct net_device *dev, struct netdev_bpf *bpf)
{
struct stmmac_priv *priv = netdev_priv(dev);
@@ -8263,10 +8304,10 @@ int stmmac_resume(struct device *dev)
stmmac_init_coalesce(priv);
phylink_rx_clk_stop_block(priv->phylink);
stmmac_set_rx_mode(ndev);
-
- stmmac_restore_hw_vlan_rx_fltr(priv, ndev, priv->hw);
phylink_rx_clk_stop_unblock(priv->phylink);
+ stmmac_vlan_restore(priv);
+
stmmac_enable_all_queues(priv);
stmmac_enable_all_dma_irq(priv);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.c
index b18404dd5a8b..e24efe3bfedb 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.c
@@ -76,7 +76,9 @@ static int vlan_add_hw_rx_fltr(struct net_device *dev,
}
hw->vlan_filter[0] = vid;
- vlan_write_single(dev, vid);
+
+ if (netif_running(dev))
+ vlan_write_single(dev, vid);
return 0;
}
@@ -97,12 +99,15 @@ static int vlan_add_hw_rx_fltr(struct net_device *dev,
return -EPERM;
}
- ret = vlan_write_filter(dev, hw, index, val);
+ if (netif_running(dev)) {
+ ret = vlan_write_filter(dev, hw, index, val);
+ if (ret)
+ return ret;
+ }
- if (!ret)
- hw->vlan_filter[index] = val;
+ hw->vlan_filter[index] = val;
- return ret;
+ return 0;
}
static int vlan_del_hw_rx_fltr(struct net_device *dev,
@@ -115,7 +120,9 @@ static int vlan_del_hw_rx_fltr(struct net_device *dev,
if (hw->num_vlan == 1) {
if ((hw->vlan_filter[0] & VLAN_TAG_VID) == vid) {
hw->vlan_filter[0] = 0;
- vlan_write_single(dev, 0);
+
+ if (netif_running(dev))
+ vlan_write_single(dev, 0);
}
return 0;
}
@@ -124,25 +131,23 @@ static int vlan_del_hw_rx_fltr(struct net_device *dev,
for (i = 0; i < hw->num_vlan; i++) {
if ((hw->vlan_filter[i] & VLAN_TAG_DATA_VEN) &&
((hw->vlan_filter[i] & VLAN_TAG_DATA_VID) == vid)) {
- ret = vlan_write_filter(dev, hw, i, 0);
- if (!ret)
- hw->vlan_filter[i] = 0;
- else
- return ret;
+ if (netif_running(dev)) {
+ ret = vlan_write_filter(dev, hw, i, 0);
+ if (ret)
+ return ret;
+ }
+
+ hw->vlan_filter[i] = 0;
}
}
- return ret;
+ return 0;
}
static void vlan_restore_hw_rx_fltr(struct net_device *dev,
struct mac_device_info *hw)
{
- void __iomem *ioaddr = hw->pcsr;
- u32 value;
- u32 hash;
- u32 val;
int i;
/* Single Rx VLAN Filter */
@@ -152,19 +157,8 @@ static void vlan_restore_hw_rx_fltr(struct net_device *dev,
}
/* Extended Rx VLAN Filter Enable */
- for (i = 0; i < hw->num_vlan; i++) {
- if (hw->vlan_filter[i] & VLAN_TAG_DATA_VEN) {
- val = hw->vlan_filter[i];
- vlan_write_filter(dev, hw, i, val);
- }
- }
-
- hash = readl(ioaddr + VLAN_HASH_TABLE);
- if (hash & VLAN_VLHT) {
- value = readl(ioaddr + VLAN_TAG);
- value |= VLAN_VTHM;
- writel(value, ioaddr + VLAN_TAG);
- }
+ for (i = 0; i < hw->num_vlan; i++)
+ vlan_write_filter(dev, hw, i, hw->vlan_filter[i]);
}
static void vlan_update_hash(struct mac_device_info *hw, u32 hash,
@@ -183,6 +177,10 @@ static void vlan_update_hash(struct mac_device_info *hw, u32 hash,
value |= VLAN_EDVLP;
value |= VLAN_ESVL;
value |= VLAN_DOVLTC;
+ } else {
+ value &= ~VLAN_EDVLP;
+ value &= ~VLAN_ESVL;
+ value &= ~VLAN_DOVLTC;
}
writel(value, ioaddr + VLAN_TAG);
@@ -193,6 +191,10 @@ static void vlan_update_hash(struct mac_device_info *hw, u32 hash,
value |= VLAN_EDVLP;
value |= VLAN_ESVL;
value |= VLAN_DOVLTC;
+ } else {
+ value &= ~VLAN_EDVLP;
+ value &= ~VLAN_ESVL;
+ value &= ~VLAN_DOVLTC;
}
writel(value | perfect_match, ioaddr + VLAN_TAG);
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index 5924db6be3fe..967918050433 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -391,7 +391,7 @@ static void am65_cpsw_nuss_ndo_slave_set_rx_mode(struct net_device *ndev)
cpsw_ale_set_allmulti(common->ale,
ndev->flags & IFF_ALLMULTI, port->port_id);
- port_mask = ALE_PORT_HOST;
+ port_mask = BIT(port->port_id) | ALE_PORT_HOST;
/* Clear all mcast from ALE */
cpsw_ale_flush_multicast(common->ale, port_mask, -1);
diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c
index bb969dd435b4..be7b69319221 100644
--- a/drivers/net/ethernet/ti/cpsw_ale.c
+++ b/drivers/net/ethernet/ti/cpsw_ale.c
@@ -450,14 +450,13 @@ static void cpsw_ale_flush_mcast(struct cpsw_ale *ale, u32 *ale_entry,
ale->port_mask_bits);
if ((mask & port_mask) == 0)
return; /* ports dont intersect, not interested */
- mask &= ~port_mask;
+ mask &= (~port_mask | ALE_PORT_HOST);
- /* free if only remaining port is host port */
- if (mask)
+ if (mask == 0x0 || mask == ALE_PORT_HOST)
+ cpsw_ale_set_entry_type(ale_entry, ALE_TYPE_FREE);
+ else
cpsw_ale_set_port_mask(ale_entry, mask,
ale->port_mask_bits);
- else
- cpsw_ale_set_entry_type(ale_entry, ALE_TYPE_FREE);
}
int cpsw_ale_flush_multicast(struct cpsw_ale *ale, int port_mask, int vid)
diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c
index 9efa39069c8c..372e55803168 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c
+++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c
@@ -279,6 +279,14 @@ static int prueth_emac_common_start(struct prueth *prueth)
if (ret)
goto disable_class;
+ /* Reset link state to force reconfiguration in
+ * emac_adjust_link(). Without this, if the link was already up
+ * before restart, emac_adjust_link() won't detect any state
+ * change and will skip critical configuration like writing
+ * speed to firmware.
+ */
+ emac->link = 0;
+
mutex_lock(&emac->ndev->phydev->lock);
emac_adjust_link(emac->ndev);
mutex_unlock(&emac->ndev->phydev->lock);
diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
index 2db116fb1a7c..3c9acd6e49e8 100644
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c
@@ -617,7 +617,7 @@ static ssize_t sysdata_release_enabled_show(struct config_item *item,
bool release_enabled;
dynamic_netconsole_mutex_lock();
- release_enabled = !!(nt->sysdata_fields & SYSDATA_TASKNAME);
+ release_enabled = !!(nt->sysdata_fields & SYSDATA_RELEASE);
dynamic_netconsole_mutex_unlock();
return sysfs_emit(buf, "%d\n", release_enabled);
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index ea7c1eadc864..3b6d4252d34c 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -10088,6 +10088,7 @@ static const struct usb_device_id rtl8152_table[] = {
{ USB_DEVICE(VENDOR_ID_DLINK, 0xb301) },
{ USB_DEVICE(VENDOR_ID_DELL, 0xb097) },
{ USB_DEVICE(VENDOR_ID_ASUS, 0x1976) },
+ { USB_DEVICE(VENDOR_ID_TRENDNET, 0xe02b) },
{}
};
diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
index 05558b6afecd..17c941aac32d 100644
--- a/drivers/net/vxlan/vxlan_core.c
+++ b/drivers/net/vxlan/vxlan_core.c
@@ -2130,6 +2130,11 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
{
struct ipv6hdr *pip6;
+ /* check if nd_tbl is not initiliazed due to
+ * ipv6.disable=1 set during boot
+ */
+ if (!ipv6_stub->nd_tbl)
+ return false;
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
return false;
pip6 = ipv6_hdr(skb);
diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c
index 05268d425d6a..af7590639dbf 100644
--- a/drivers/net/wireless/ath/ath12k/mac.c
+++ b/drivers/net/wireless/ath/ath12k/mac.c
@@ -5432,7 +5432,7 @@ int ath12k_mac_op_get_txpower(struct ieee80211_hw *hw,
ar->last_tx_power_update))
goto send_tx_power;
- params.pdev_id = ar->pdev->pdev_id;
+ params.pdev_id = ath12k_mac_get_target_pdev_id(ar);
params.vdev_id = arvif->vdev_id;
params.stats_id = WMI_REQUEST_PDEV_STAT;
ret = ath12k_mac_get_fw_stats(ar, &params);
@@ -13454,7 +13454,7 @@ void ath12k_mac_op_sta_statistics(struct ieee80211_hw *hw,
/* TODO: Use real NF instead of default one. */
signal = rate_info.rssi_comb;
- params.pdev_id = ar->pdev->pdev_id;
+ params.pdev_id = ath12k_mac_get_target_pdev_id(ar);
params.vdev_id = 0;
params.stats_id = WMI_REQUEST_VDEV_STAT;
@@ -13582,7 +13582,7 @@ void ath12k_mac_op_link_sta_statistics(struct ieee80211_hw *hw,
spin_unlock_bh(&ar->ab->dp->dp_lock);
if (!signal && ahsta->ahvif->vdev_type == WMI_VDEV_TYPE_STA) {
- params.pdev_id = ar->pdev->pdev_id;
+ params.pdev_id = ath12k_mac_get_target_pdev_id(ar);
params.vdev_id = 0;
params.stats_id = WMI_REQUEST_VDEV_STAT;
diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c
index 8e13c3ec1cc7..b93e33edf369 100644
--- a/drivers/net/wireless/ath/ath12k/wmi.c
+++ b/drivers/net/wireless/ath/ath12k/wmi.c
@@ -8216,8 +8216,6 @@ static int ath12k_wmi_tlv_fw_stats_data_parse(struct ath12k_base *ab,
struct ath12k_fw_stats *stats = parse->stats;
struct ath12k *ar;
struct ath12k_link_vif *arvif;
- struct ieee80211_sta *sta;
- struct ath12k_sta *ahsta;
struct ath12k_link_sta *arsta;
int i, ret = 0;
const void *data = ptr;
@@ -8253,21 +8251,19 @@ static int ath12k_wmi_tlv_fw_stats_data_parse(struct ath12k_base *ab,
arvif = ath12k_mac_get_arvif(ar, le32_to_cpu(src->vdev_id));
if (arvif) {
- sta = ieee80211_find_sta_by_ifaddr(ath12k_ar_to_hw(ar),
- arvif->bssid,
- NULL);
- if (sta) {
- ahsta = ath12k_sta_to_ahsta(sta);
- arsta = &ahsta->deflink;
+ spin_lock_bh(&ab->base_lock);
+ arsta = ath12k_link_sta_find_by_addr(ab, arvif->bssid);
+ if (arsta) {
arsta->rssi_beacon = le32_to_cpu(src->beacon_snr);
ath12k_dbg(ab, ATH12K_DBG_WMI,
"wmi stats vdev id %d snr %d\n",
src->vdev_id, src->beacon_snr);
} else {
- ath12k_dbg(ab, ATH12K_DBG_WMI,
- "not found station bssid %pM for vdev stat\n",
- arvif->bssid);
+ ath12k_warn(ab,
+ "not found link sta with bssid %pM for vdev stat\n",
+ arvif->bssid);
}
+ spin_unlock_bh(&ab->base_lock);
}
data += sizeof(*src);
@@ -8338,8 +8334,6 @@ static int ath12k_wmi_tlv_rssi_chain_parse(struct ath12k_base *ab,
struct ath12k_fw_stats *stats = parse->stats;
struct ath12k_link_vif *arvif;
struct ath12k_link_sta *arsta;
- struct ieee80211_sta *sta;
- struct ath12k_sta *ahsta;
struct ath12k *ar;
int vdev_id;
int j;
@@ -8375,19 +8369,15 @@ static int ath12k_wmi_tlv_rssi_chain_parse(struct ath12k_base *ab,
"stats bssid %pM vif %p\n",
arvif->bssid, arvif->ahvif->vif);
- sta = ieee80211_find_sta_by_ifaddr(ath12k_ar_to_hw(ar),
- arvif->bssid,
- NULL);
- if (!sta) {
- ath12k_dbg(ab, ATH12K_DBG_WMI,
- "not found station of bssid %pM for rssi chain\n",
- arvif->bssid);
+ guard(spinlock_bh)(&ab->base_lock);
+ arsta = ath12k_link_sta_find_by_addr(ab, arvif->bssid);
+ if (!arsta) {
+ ath12k_warn(ab,
+ "not found link sta with bssid %pM for rssi chain\n",
+ arvif->bssid);
return -EPROTO;
}
- ahsta = ath12k_sta_to_ahsta(sta);
- arsta = &ahsta->deflink;
-
BUILD_BUG_ON(ARRAY_SIZE(arsta->chain_signal) >
ARRAY_SIZE(stats_rssi->rssi_avg_beacon));
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
index 3304b5971be0..b41ca1410da9 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
@@ -413,6 +413,7 @@ mt76_connac2_mac_write_txwi_80211(struct mt76_dev *dev, __le32 *txwi,
u32 val;
if (ieee80211_is_action(fc) &&
+ skb->len >= IEEE80211_MIN_ACTION_SIZE + 1 + 1 + 2 &&
mgmt->u.action.category == WLAN_CATEGORY_BACK &&
mgmt->u.action.u.addba_req.action_code == WLAN_ACTION_ADDBA_REQ) {
u16 capab = le16_to_cpu(mgmt->u.action.u.addba_req.capab);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mac.c b/drivers/net/wireless/mediatek/mt76/mt7925/mac.c
index 871b67101976..0d9435900423 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7925/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7925/mac.c
@@ -668,6 +668,7 @@ mt7925_mac_write_txwi_80211(struct mt76_dev *dev, __le32 *txwi,
u32 val;
if (ieee80211_is_action(fc) &&
+ skb->len >= IEEE80211_MIN_ACTION_SIZE + 1 &&
mgmt->u.action.category == WLAN_CATEGORY_BACK &&
mgmt->u.action.u.addba_req.action_code == WLAN_ACTION_ADDBA_REQ)
tid = MT_TX_ADDBA;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c
index 2560e2f46e89..d4f3ee943b47 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c
@@ -800,6 +800,7 @@ mt7996_mac_write_txwi_80211(struct mt7996_dev *dev, __le32 *txwi,
u32 val;
if (ieee80211_is_action(fc) &&
+ skb->len >= IEEE80211_MIN_ACTION_SIZE + 1 &&
mgmt->u.action.category == WLAN_CATEGORY_BACK &&
mgmt->u.action.u.addba_req.action_code == WLAN_ACTION_ADDBA_REQ) {
if (is_mt7990(&dev->mt76))
diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c
index 8c8e074a3a70..c7ae8031436a 100644
--- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c
+++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c
@@ -668,7 +668,7 @@ static int rsi_mac80211_config(struct ieee80211_hw *hw,
struct rsi_hw *adapter = hw->priv;
struct rsi_common *common = adapter->priv;
struct ieee80211_conf *conf = &hw->conf;
- int status = -EOPNOTSUPP;
+ int status = 0;
mutex_lock(&common->mutex);
diff --git a/drivers/net/wireless/st/cw1200/pm.c b/drivers/net/wireless/st/cw1200/pm.c
index 120f0379f81d..84eb15d729c7 100644
--- a/drivers/net/wireless/st/cw1200/pm.c
+++ b/drivers/net/wireless/st/cw1200/pm.c
@@ -264,12 +264,14 @@ int cw1200_wow_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
wiphy_err(priv->hw->wiphy,
"PM request failed: %d. WoW is disabled.\n", ret);
cw1200_wow_resume(hw);
+ mutex_unlock(&priv->conf_mutex);
return -EBUSY;
}
/* Force resume if event is coming from the device. */
if (atomic_read(&priv->bh_rx)) {
cw1200_wow_resume(hw);
+ mutex_unlock(&priv->conf_mutex);
return -EAGAIN;
}
diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
index 17dd417756f2..1c340a4a0930 100644
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -1875,6 +1875,8 @@ static int __maybe_unused wl1271_op_resume(struct ieee80211_hw *hw)
wl->wow_enabled);
WARN_ON(!wl->wow_enabled);
+ mutex_lock(&wl->mutex);
+
ret = pm_runtime_force_resume(wl->dev);
if (ret < 0) {
wl1271_error("ELP wakeup failure!");
@@ -1891,8 +1893,6 @@ static int __maybe_unused wl1271_op_resume(struct ieee80211_hw *hw)
run_irq_work = true;
spin_unlock_irqrestore(&wl->wl_lock, flags);
- mutex_lock(&wl->mutex);
-
/* test the recovery flag before calling any SDIO functions */
pending_recovery = test_bit(WL1271_FLAG_RECOVERY_IN_PROGRESS,
&wl->flags);
diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c
index 295076cf70de..c57ae4d6c5c0 100644
--- a/drivers/pci/controller/dwc/pcie-designware-ep.c
+++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
@@ -905,6 +905,19 @@ int dw_pcie_ep_raise_msi_irq(struct dw_pcie_ep *ep, u8 func_no,
* supported, so we avoid reprogramming the region on every MSI,
* specifically unmapping immediately after writel().
*/
+ if (ep->msi_iatu_mapped && (ep->msi_msg_addr != msg_addr ||
+ ep->msi_map_size != map_size)) {
+ /*
+ * The host changed the MSI target address or the required
+ * mapping size changed. Reprogramming the iATU when there are
+ * operations in flight is unsafe on this controller. However,
+ * there is no unified way to check if we have operations in
+ * flight, thus we don't know if we should WARN() or not.
+ */
+ dw_pcie_ep_unmap_addr(epc, func_no, 0, ep->msi_mem_phys);
+ ep->msi_iatu_mapped = false;
+ }
+
if (!ep->msi_iatu_mapped) {
ret = dw_pcie_ep_map_addr(epc, func_no, 0,
ep->msi_mem_phys, msg_addr,
@@ -915,15 +928,6 @@ int dw_pcie_ep_raise_msi_irq(struct dw_pcie_ep *ep, u8 func_no,
ep->msi_iatu_mapped = true;
ep->msi_msg_addr = msg_addr;
ep->msi_map_size = map_size;
- } else if (WARN_ON_ONCE(ep->msi_msg_addr != msg_addr ||
- ep->msi_map_size != map_size)) {
- /*
- * The host changed the MSI target address or the required
- * mapping size changed. Reprogramming the iATU at runtime is
- * unsafe on this controller, so bail out instead of trying to
- * update the existing region.
- */
- return -EINVAL;
}
writel(msg_data | (interrupt_num - 1), ep->msi_mem + offset);
@@ -1010,6 +1014,9 @@ int dw_pcie_ep_raise_msix_irq(struct dw_pcie_ep *ep, u8 func_no,
writel(msg_data, ep->msi_mem + offset);
+ /* flush posted write before unmap */
+ readl(ep->msi_mem + offset);
+
dw_pcie_ep_unmap_addr(epc, func_no, 0, ep->msi_mem_phys);
return 0;
diff --git a/drivers/pinctrl/cirrus/pinctrl-cs42l43.c b/drivers/pinctrl/cirrus/pinctrl-cs42l43.c
index a8f82104a384..227c37c360e1 100644
--- a/drivers/pinctrl/cirrus/pinctrl-cs42l43.c
+++ b/drivers/pinctrl/cirrus/pinctrl-cs42l43.c
@@ -574,10 +574,9 @@ static int cs42l43_pin_probe(struct platform_device *pdev)
if (child) {
ret = devm_add_action_or_reset(&pdev->dev,
cs42l43_fwnode_put, child);
- if (ret) {
- fwnode_handle_put(child);
+ if (ret)
return ret;
- }
+
if (!child->dev)
child->dev = priv->dev;
fwnode = child;
diff --git a/drivers/pinctrl/cix/pinctrl-sky1.c b/drivers/pinctrl/cix/pinctrl-sky1.c
index 5d0d8be815b2..938894058d86 100644
--- a/drivers/pinctrl/cix/pinctrl-sky1.c
+++ b/drivers/pinctrl/cix/pinctrl-sky1.c
@@ -522,11 +522,10 @@ static int __maybe_unused sky1_pinctrl_resume(struct device *dev)
return pinctrl_force_default(spctl->pctl);
}
-const struct dev_pm_ops sky1_pinctrl_pm_ops = {
+static const struct dev_pm_ops sky1_pinctrl_pm_ops = {
SET_LATE_SYSTEM_SLEEP_PM_OPS(sky1_pinctrl_suspend,
sky1_pinctrl_resume)
};
-EXPORT_SYMBOL_GPL(sky1_pinctrl_pm_ops);
static int sky1_pinctrl_probe(struct platform_device *pdev)
{
diff --git a/drivers/pinctrl/meson/pinctrl-amlogic-a4.c b/drivers/pinctrl/meson/pinctrl-amlogic-a4.c
index dfa32b11555c..e2293a872dcb 100644
--- a/drivers/pinctrl/meson/pinctrl-amlogic-a4.c
+++ b/drivers/pinctrl/meson/pinctrl-amlogic-a4.c
@@ -679,7 +679,6 @@ static int aml_dt_node_to_map_pinmux(struct pinctrl_dev *pctldev,
unsigned int *num_maps)
{
struct device *dev = pctldev->dev;
- struct device_node *pnode;
unsigned long *configs = NULL;
unsigned int num_configs = 0;
struct property *prop;
@@ -693,7 +692,7 @@ static int aml_dt_node_to_map_pinmux(struct pinctrl_dev *pctldev,
return -ENOENT;
}
- pnode = of_get_parent(np);
+ struct device_node *pnode __free(device_node) = of_get_parent(np);
if (!pnode) {
dev_info(dev, "Missing function node\n");
return -EINVAL;
diff --git a/drivers/pinctrl/pinconf-generic.c b/drivers/pinctrl/pinconf-generic.c
index 94b1d057197c..2b030bd0e6ad 100644
--- a/drivers/pinctrl/pinconf-generic.c
+++ b/drivers/pinctrl/pinconf-generic.c
@@ -351,13 +351,13 @@ int pinconf_generic_parse_dt_config(struct device_node *np,
ret = parse_dt_cfg(np, dt_params, ARRAY_SIZE(dt_params), cfg, &ncfg);
if (ret)
- return ret;
+ goto out;
if (pctldev && pctldev->desc->num_custom_params &&
pctldev->desc->custom_params) {
ret = parse_dt_cfg(np, pctldev->desc->custom_params,
pctldev->desc->num_custom_params, cfg, &ncfg);
if (ret)
- return ret;
+ goto out;
}
/* no configs found at all */
diff --git a/drivers/pinctrl/pinctrl-amdisp.c b/drivers/pinctrl/pinctrl-amdisp.c
index efbf40c776ea..e0874cc086a7 100644
--- a/drivers/pinctrl/pinctrl-amdisp.c
+++ b/drivers/pinctrl/pinctrl-amdisp.c
@@ -80,7 +80,7 @@ static int amdisp_get_group_pins(struct pinctrl_dev *pctldev,
return 0;
}
-const struct pinctrl_ops amdisp_pinctrl_ops = {
+static const struct pinctrl_ops amdisp_pinctrl_ops = {
.get_groups_count = amdisp_get_groups_count,
.get_group_name = amdisp_get_group_name,
.get_group_pins = amdisp_get_group_pins,
diff --git a/drivers/pinctrl/pinctrl-cy8c95x0.c b/drivers/pinctrl/pinctrl-cy8c95x0.c
index a4b04bf6d081..5c055d344ac9 100644
--- a/drivers/pinctrl/pinctrl-cy8c95x0.c
+++ b/drivers/pinctrl/pinctrl-cy8c95x0.c
@@ -627,7 +627,7 @@ static int cy8c95x0_write_regs_mask(struct cy8c95x0_pinctrl *chip, int reg,
bitmap_scatter(tmask, mask, chip->map, MAX_LINE);
bitmap_scatter(tval, val, chip->map, MAX_LINE);
- for_each_set_clump8(offset, bits, tmask, chip->tpin) {
+ for_each_set_clump8(offset, bits, tmask, chip->nport * BANK_SZ) {
unsigned int i = offset / 8;
write_val = bitmap_get_value8(tval, offset);
@@ -655,7 +655,7 @@ static int cy8c95x0_read_regs_mask(struct cy8c95x0_pinctrl *chip, int reg,
bitmap_scatter(tmask, mask, chip->map, MAX_LINE);
bitmap_scatter(tval, val, chip->map, MAX_LINE);
- for_each_set_clump8(offset, bits, tmask, chip->tpin) {
+ for_each_set_clump8(offset, bits, tmask, chip->nport * BANK_SZ) {
unsigned int i = offset / 8;
ret = cy8c95x0_regmap_read_bits(chip, reg, i, bits, &read_val);
diff --git a/drivers/pinctrl/pinctrl-equilibrium.c b/drivers/pinctrl/pinctrl-equilibrium.c
index 48b55c5bf8d4..ba1c867b7b89 100644
--- a/drivers/pinctrl/pinctrl-equilibrium.c
+++ b/drivers/pinctrl/pinctrl-equilibrium.c
@@ -23,7 +23,7 @@
#define PIN_NAME_LEN 10
#define PAD_REG_OFF 0x100
-static void eqbr_gpio_disable_irq(struct irq_data *d)
+static void eqbr_irq_mask(struct irq_data *d)
{
struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
struct eqbr_gpio_ctrl *gctrl = gpiochip_get_data(gc);
@@ -36,7 +36,7 @@ static void eqbr_gpio_disable_irq(struct irq_data *d)
gpiochip_disable_irq(gc, offset);
}
-static void eqbr_gpio_enable_irq(struct irq_data *d)
+static void eqbr_irq_unmask(struct irq_data *d)
{
struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
struct eqbr_gpio_ctrl *gctrl = gpiochip_get_data(gc);
@@ -50,7 +50,7 @@ static void eqbr_gpio_enable_irq(struct irq_data *d)
raw_spin_unlock_irqrestore(&gctrl->lock, flags);
}
-static void eqbr_gpio_ack_irq(struct irq_data *d)
+static void eqbr_irq_ack(struct irq_data *d)
{
struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
struct eqbr_gpio_ctrl *gctrl = gpiochip_get_data(gc);
@@ -62,10 +62,17 @@ static void eqbr_gpio_ack_irq(struct irq_data *d)
raw_spin_unlock_irqrestore(&gctrl->lock, flags);
}
-static void eqbr_gpio_mask_ack_irq(struct irq_data *d)
+static void eqbr_irq_mask_ack(struct irq_data *d)
{
- eqbr_gpio_disable_irq(d);
- eqbr_gpio_ack_irq(d);
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+ struct eqbr_gpio_ctrl *gctrl = gpiochip_get_data(gc);
+ unsigned int offset = irqd_to_hwirq(d);
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&gctrl->lock, flags);
+ writel(BIT(offset), gctrl->membase + GPIO_IRNENCLR);
+ writel(BIT(offset), gctrl->membase + GPIO_IRNCR);
+ raw_spin_unlock_irqrestore(&gctrl->lock, flags);
}
static inline void eqbr_cfg_bit(void __iomem *addr,
@@ -92,7 +99,7 @@ static int eqbr_irq_type_cfg(struct gpio_irq_type *type,
return 0;
}
-static int eqbr_gpio_set_irq_type(struct irq_data *d, unsigned int type)
+static int eqbr_irq_set_type(struct irq_data *d, unsigned int type)
{
struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
struct eqbr_gpio_ctrl *gctrl = gpiochip_get_data(gc);
@@ -166,11 +173,11 @@ static void eqbr_irq_handler(struct irq_desc *desc)
static const struct irq_chip eqbr_irq_chip = {
.name = "gpio_irq",
- .irq_mask = eqbr_gpio_disable_irq,
- .irq_unmask = eqbr_gpio_enable_irq,
- .irq_ack = eqbr_gpio_ack_irq,
- .irq_mask_ack = eqbr_gpio_mask_ack_irq,
- .irq_set_type = eqbr_gpio_set_irq_type,
+ .irq_ack = eqbr_irq_ack,
+ .irq_mask = eqbr_irq_mask,
+ .irq_mask_ack = eqbr_irq_mask_ack,
+ .irq_unmask = eqbr_irq_unmask,
+ .irq_set_type = eqbr_irq_set_type,
.flags = IRQCHIP_IMMUTABLE,
GPIOCHIP_IRQ_RESOURCE_HELPERS,
};
diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c
index d87c0b1de616..f15b18f334ee 100644
--- a/drivers/pinctrl/pinctrl-rockchip.c
+++ b/drivers/pinctrl/pinctrl-rockchip.c
@@ -3640,14 +3640,10 @@ static int rockchip_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin,
* or the gpio driver hasn't probed yet.
*/
scoped_guard(mutex, &bank->deferred_lock) {
- if (!gpio || !gpio->direction_output) {
- rc = rockchip_pinconf_defer_pin(bank,
- pin - bank->pin_base,
- param, arg);
- if (rc)
- return rc;
- break;
- }
+ if (!gpio || !gpio->direction_output)
+ return rockchip_pinconf_defer_pin(bank,
+ pin - bank->pin_base,
+ param, arg);
}
}
diff --git a/drivers/pinctrl/qcom/pinctrl-qcs615.c b/drivers/pinctrl/qcom/pinctrl-qcs615.c
index 4dfa820d4e77..f1c827ddbfbf 100644
--- a/drivers/pinctrl/qcom/pinctrl-qcs615.c
+++ b/drivers/pinctrl/qcom/pinctrl-qcs615.c
@@ -1067,6 +1067,7 @@ static const struct msm_pinctrl_soc_data qcs615_tlmm = {
.ntiles = ARRAY_SIZE(qcs615_tiles),
.wakeirq_map = qcs615_pdc_map,
.nwakeirq_map = ARRAY_SIZE(qcs615_pdc_map),
+ .wakeirq_dual_edge_errata = true,
};
static const struct of_device_id qcs615_tlmm_of_match[] = {
diff --git a/drivers/pinctrl/qcom/pinctrl-sdm660-lpass-lpi.c b/drivers/pinctrl/qcom/pinctrl-sdm660-lpass-lpi.c
index d93af5f0e8d3..65411abfbfac 100644
--- a/drivers/pinctrl/qcom/pinctrl-sdm660-lpass-lpi.c
+++ b/drivers/pinctrl/qcom/pinctrl-sdm660-lpass-lpi.c
@@ -76,7 +76,7 @@ static const char * const pdm_clk_groups[] = { "gpio18" };
static const char * const pdm_rx_groups[] = { "gpio21", "gpio23", "gpio25" };
static const char * const pdm_sync_groups[] = { "gpio19" };
-const struct lpi_pingroup sdm660_lpi_pinctrl_groups[] = {
+static const struct lpi_pingroup sdm660_lpi_pinctrl_groups[] = {
LPI_PINGROUP_OFFSET(0, LPI_NO_SLEW, _, _, _, _, 0x0000),
LPI_PINGROUP_OFFSET(1, LPI_NO_SLEW, _, _, _, _, 0x1000),
LPI_PINGROUP_OFFSET(2, LPI_NO_SLEW, _, _, _, _, 0x2000),
@@ -113,7 +113,7 @@ const struct lpi_pingroup sdm660_lpi_pinctrl_groups[] = {
LPI_PINGROUP_OFFSET(31, LPI_NO_SLEW, _, _, _, _, 0xb010),
};
-const struct lpi_function sdm660_lpi_pinctrl_functions[] = {
+static const struct lpi_function sdm660_lpi_pinctrl_functions[] = {
LPI_FUNCTION(comp_rx),
LPI_FUNCTION(dmic1_clk),
LPI_FUNCTION(dmic1_data),
diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c
index 48434292a39b..c990b6118172 100644
--- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c
+++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c
@@ -204,6 +204,32 @@ sunxi_pinctrl_desc_find_function_by_pin(struct sunxi_pinctrl *pctl,
return NULL;
}
+static struct sunxi_desc_function *
+sunxi_pinctrl_desc_find_function_by_pin_and_mux(struct sunxi_pinctrl *pctl,
+ const u16 pin_num,
+ const u8 muxval)
+{
+ for (unsigned int i = 0; i < pctl->desc->npins; i++) {
+ const struct sunxi_desc_pin *pin = pctl->desc->pins + i;
+ struct sunxi_desc_function *func = pin->functions;
+
+ if (pin->pin.number != pin_num)
+ continue;
+
+ if (pin->variant && !(pctl->variant & pin->variant))
+ continue;
+
+ while (func->name) {
+ if (func->muxval == muxval)
+ return func;
+
+ func++;
+ }
+ }
+
+ return NULL;
+}
+
static int sunxi_pctrl_get_groups_count(struct pinctrl_dev *pctldev)
{
struct sunxi_pinctrl *pctl = pinctrl_dev_get_drvdata(pctldev);
@@ -930,6 +956,30 @@ static const struct pinmux_ops sunxi_pmx_ops = {
.strict = true,
};
+static int sunxi_pinctrl_gpio_get_direction(struct gpio_chip *chip,
+ unsigned int offset)
+{
+ struct sunxi_pinctrl *pctl = gpiochip_get_data(chip);
+ const struct sunxi_desc_function *func;
+ u32 pin = offset + chip->base;
+ u32 reg, shift, mask;
+ u8 muxval;
+
+ sunxi_mux_reg(pctl, offset, &reg, &shift, &mask);
+
+ muxval = (readl(pctl->membase + reg) & mask) >> shift;
+
+ func = sunxi_pinctrl_desc_find_function_by_pin_and_mux(pctl, pin, muxval);
+ if (!func)
+ return -ENODEV;
+
+ if (!strcmp(func->name, "gpio_out"))
+ return GPIO_LINE_DIRECTION_OUT;
+ if (!strcmp(func->name, "gpio_in") || !strcmp(func->name, "irq"))
+ return GPIO_LINE_DIRECTION_IN;
+ return -EINVAL;
+}
+
static int sunxi_pinctrl_gpio_direction_input(struct gpio_chip *chip,
unsigned offset)
{
@@ -1599,6 +1649,7 @@ int sunxi_pinctrl_init_with_flags(struct platform_device *pdev,
pctl->chip->request = gpiochip_generic_request;
pctl->chip->free = gpiochip_generic_free;
pctl->chip->set_config = gpiochip_generic_config;
+ pctl->chip->get_direction = sunxi_pinctrl_gpio_get_direction;
pctl->chip->direction_input = sunxi_pinctrl_gpio_direction_input;
pctl->chip->direction_output = sunxi_pinctrl_gpio_direction_output;
pctl->chip->get = sunxi_pinctrl_gpio_get;
diff --git a/drivers/pmdomain/imx/gpcv2.c b/drivers/pmdomain/imx/gpcv2.c
index cff738e4d546..a829f8da5be7 100644
--- a/drivers/pmdomain/imx/gpcv2.c
+++ b/drivers/pmdomain/imx/gpcv2.c
@@ -1416,7 +1416,9 @@ static int imx_pgc_domain_suspend(struct device *dev)
static int imx_pgc_domain_resume(struct device *dev)
{
- return pm_runtime_put(dev);
+ pm_runtime_put(dev);
+
+ return 0;
}
#endif
diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index a708fc63f581..d10b6f9243d5 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -508,7 +508,7 @@ config REGULATOR_FP9931
This driver supports the FP9931/JD9930 voltage regulator chip
which is used to provide power to Electronic Paper Displays
so it is found in E-Book readers.
- If HWWON is enabled, it also provides temperature measurement.
+ If HWMON is enabled, it also provides temperature measurement.
config REGULATOR_LM363X
tristate "TI LM363X voltage regulators"
diff --git a/drivers/regulator/bq257xx-regulator.c b/drivers/regulator/bq257xx-regulator.c
index fc1ccede4468..dab8f1ab4450 100644
--- a/drivers/regulator/bq257xx-regulator.c
+++ b/drivers/regulator/bq257xx-regulator.c
@@ -115,11 +115,10 @@ static void bq257xx_reg_dt_parse_gpio(struct platform_device *pdev)
return;
subchild = of_get_child_by_name(child, pdata->desc.of_match);
+ of_node_put(child);
if (!subchild)
return;
- of_node_put(child);
-
pdata->otg_en_gpio = devm_fwnode_gpiod_get_index(&pdev->dev,
of_fwnode_handle(subchild),
"enable", 0,
diff --git a/drivers/regulator/fp9931.c b/drivers/regulator/fp9931.c
index 7fbcc6327cc6..abea3b69d8a0 100644
--- a/drivers/regulator/fp9931.c
+++ b/drivers/regulator/fp9931.c
@@ -144,13 +144,12 @@ static int fp9931_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
return ret;
ret = regmap_read(data->regmap, FP9931_REG_TMST_VALUE, &val);
- if (ret)
- return ret;
+ if (!ret)
+ *temp = (s8)val * 1000;
pm_runtime_put_autosuspend(data->dev);
- *temp = (s8)val * 1000;
- return 0;
+ return ret;
}
static umode_t fp9931_hwmon_is_visible(const void *data,
diff --git a/drivers/regulator/tps65185.c b/drivers/regulator/tps65185.c
index 3286c9ab33d0..786622d8d598 100644
--- a/drivers/regulator/tps65185.c
+++ b/drivers/regulator/tps65185.c
@@ -332,6 +332,9 @@ static int tps65185_probe(struct i2c_client *client)
int i;
data = devm_kzalloc(&client->dev, sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
data->regmap = devm_regmap_init_i2c(client, &regmap_config);
if (IS_ERR(data->regmap))
return dev_err_probe(&client->dev, PTR_ERR(data->regmap),
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 94ad253d65a0..e9d9ac7da485 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -12025,6 +12025,8 @@ lpfc_sli4_pci_mem_unset(struct lpfc_hba *phba)
iounmap(phba->sli4_hba.conf_regs_memmap_p);
if (phba->sli4_hba.dpp_regs_memmap_p)
iounmap(phba->sli4_hba.dpp_regs_memmap_p);
+ if (phba->sli4_hba.dpp_regs_memmap_wc_p)
+ iounmap(phba->sli4_hba.dpp_regs_memmap_wc_p);
break;
case LPFC_SLI_INTF_IF_TYPE_1:
break;
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 1cbfbe44cb7c..303523f754b8 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -15977,6 +15977,32 @@ lpfc_dual_chute_pci_bar_map(struct lpfc_hba *phba, uint16_t pci_barset)
return NULL;
}
+static __maybe_unused void __iomem *
+lpfc_dpp_wc_map(struct lpfc_hba *phba, uint8_t dpp_barset)
+{
+
+ /* DPP region is supposed to cover 64-bit BAR2 */
+ if (dpp_barset != WQ_PCI_BAR_4_AND_5) {
+ lpfc_log_msg(phba, KERN_WARNING, LOG_INIT,
+ "3273 dpp_barset x%x != WQ_PCI_BAR_4_AND_5\n",
+ dpp_barset);
+ return NULL;
+ }
+
+ if (!phba->sli4_hba.dpp_regs_memmap_wc_p) {
+ void __iomem *dpp_map;
+
+ dpp_map = ioremap_wc(phba->pci_bar2_map,
+ pci_resource_len(phba->pcidev,
+ PCI_64BIT_BAR4));
+
+ if (dpp_map)
+ phba->sli4_hba.dpp_regs_memmap_wc_p = dpp_map;
+ }
+
+ return phba->sli4_hba.dpp_regs_memmap_wc_p;
+}
+
/**
* lpfc_modify_hba_eq_delay - Modify Delay Multiplier on EQs
* @phba: HBA structure that EQs are on.
@@ -16940,9 +16966,6 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq,
uint8_t dpp_barset;
uint32_t dpp_offset;
uint8_t wq_create_version;
-#ifdef CONFIG_X86
- unsigned long pg_addr;
-#endif
/* sanity check on queue memory */
if (!wq || !cq)
@@ -17128,14 +17151,15 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq,
#ifdef CONFIG_X86
/* Enable combined writes for DPP aperture */
- pg_addr = (unsigned long)(wq->dpp_regaddr) & PAGE_MASK;
- rc = set_memory_wc(pg_addr, 1);
- if (rc) {
+ bar_memmap_p = lpfc_dpp_wc_map(phba, dpp_barset);
+ if (!bar_memmap_p) {
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
"3272 Cannot setup Combined "
"Write on WQ[%d] - disable DPP\n",
wq->queue_id);
phba->cfg_enable_dpp = 0;
+ } else {
+ wq->dpp_regaddr = bar_memmap_p + dpp_offset;
}
#else
phba->cfg_enable_dpp = 0;
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index ee58383492b2..b6d90604bb61 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -785,6 +785,9 @@ struct lpfc_sli4_hba {
void __iomem *dpp_regs_memmap_p; /* Kernel memory mapped address for
* dpp registers
*/
+ void __iomem *dpp_regs_memmap_wc_p;/* Kernel memory mapped address for
+ * dpp registers with write combining
+ */
union {
struct {
/* IF Type 0, BAR 0 PCI cfg space reg mem map */
diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c
index 81150bef1145..5875065e2849 100644
--- a/drivers/scsi/mpi3mr/mpi3mr_fw.c
+++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c
@@ -4807,21 +4807,25 @@ void mpi3mr_memset_buffers(struct mpi3mr_ioc *mrioc)
}
for (i = 0; i < mrioc->num_queues; i++) {
- mrioc->op_reply_qinfo[i].qid = 0;
- mrioc->op_reply_qinfo[i].ci = 0;
- mrioc->op_reply_qinfo[i].num_replies = 0;
- mrioc->op_reply_qinfo[i].ephase = 0;
- atomic_set(&mrioc->op_reply_qinfo[i].pend_ios, 0);
- atomic_set(&mrioc->op_reply_qinfo[i].in_use, 0);
- mpi3mr_memset_op_reply_q_buffers(mrioc, i);
-
- mrioc->req_qinfo[i].ci = 0;
- mrioc->req_qinfo[i].pi = 0;
- mrioc->req_qinfo[i].num_requests = 0;
- mrioc->req_qinfo[i].qid = 0;
- mrioc->req_qinfo[i].reply_qid = 0;
- spin_lock_init(&mrioc->req_qinfo[i].q_lock);
- mpi3mr_memset_op_req_q_buffers(mrioc, i);
+ if (mrioc->op_reply_qinfo) {
+ mrioc->op_reply_qinfo[i].qid = 0;
+ mrioc->op_reply_qinfo[i].ci = 0;
+ mrioc->op_reply_qinfo[i].num_replies = 0;
+ mrioc->op_reply_qinfo[i].ephase = 0;
+ atomic_set(&mrioc->op_reply_qinfo[i].pend_ios, 0);
+ atomic_set(&mrioc->op_reply_qinfo[i].in_use, 0);
+ mpi3mr_memset_op_reply_q_buffers(mrioc, i);
+ }
+
+ if (mrioc->req_qinfo) {
+ mrioc->req_qinfo[i].ci = 0;
+ mrioc->req_qinfo[i].pi = 0;
+ mrioc->req_qinfo[i].num_requests = 0;
+ mrioc->req_qinfo[i].qid = 0;
+ mrioc->req_qinfo[i].reply_qid = 0;
+ spin_lock_init(&mrioc->req_qinfo[i].q_lock);
+ mpi3mr_memset_op_req_q_buffers(mrioc, i);
+ }
}
atomic_set(&mrioc->pend_large_data_sz, 0);
diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c
index 6a8d35aea93a..645524f3fe2d 100644
--- a/drivers/scsi/pm8001/pm8001_sas.c
+++ b/drivers/scsi/pm8001/pm8001_sas.c
@@ -525,8 +525,9 @@ int pm8001_queue_command(struct sas_task *task, gfp_t gfp_flags)
} else {
task->task_done(task);
}
- rc = -ENODEV;
- goto err_out;
+ spin_unlock_irqrestore(&pm8001_ha->lock, flags);
+ pm8001_dbg(pm8001_ha, IO, "pm8001_task_exec device gone\n");
+ return 0;
}
ccb = pm8001_ccb_alloc(pm8001_ha, pm8001_dev, task);
diff --git a/drivers/scsi/ses.c b/drivers/scsi/ses.c
index 35101e9b7ba7..8e1686358e25 100644
--- a/drivers/scsi/ses.c
+++ b/drivers/scsi/ses.c
@@ -528,9 +528,8 @@ struct efd {
};
static int ses_enclosure_find_by_addr(struct enclosure_device *edev,
- void *data)
+ struct efd *efd)
{
- struct efd *efd = data;
int i;
struct ses_component *scomp;
@@ -683,7 +682,7 @@ static void ses_match_to_enclosure(struct enclosure_device *edev,
if (efd.addr) {
efd.dev = &sdev->sdev_gendev;
- enclosure_for_each_device(ses_enclosure_find_by_addr, &efd);
+ ses_enclosure_find_by_addr(edev, &efd);
}
}
diff --git a/drivers/scsi/snic/vnic_dev.c b/drivers/scsi/snic/vnic_dev.c
index c692de061f29..ed7771e62854 100644
--- a/drivers/scsi/snic/vnic_dev.c
+++ b/drivers/scsi/snic/vnic_dev.c
@@ -42,8 +42,6 @@ struct vnic_dev {
struct vnic_devcmd_notify *notify;
struct vnic_devcmd_notify notify_copy;
dma_addr_t notify_pa;
- u32 *linkstatus;
- dma_addr_t linkstatus_pa;
struct vnic_stats *stats;
dma_addr_t stats_pa;
struct vnic_devcmd_fw_info *fw_info;
@@ -650,8 +648,6 @@ int svnic_dev_init(struct vnic_dev *vdev, int arg)
int svnic_dev_link_status(struct vnic_dev *vdev)
{
- if (vdev->linkstatus)
- return *vdev->linkstatus;
if (!vnic_dev_notify_ready(vdev))
return 0;
@@ -686,11 +682,6 @@ void svnic_dev_unregister(struct vnic_dev *vdev)
sizeof(struct vnic_devcmd_notify),
vdev->notify,
vdev->notify_pa);
- if (vdev->linkstatus)
- dma_free_coherent(&vdev->pdev->dev,
- sizeof(u32),
- vdev->linkstatus,
- vdev->linkstatus_pa);
if (vdev->stats)
dma_free_coherent(&vdev->pdev->dev,
sizeof(struct vnic_stats),
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 664ad55728b5..ae1abab97835 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -1856,8 +1856,9 @@ static enum scsi_qc_status storvsc_queuecommand(struct Scsi_Host *host,
cmd_request->payload_sz = payload_sz;
/* Invokes the vsc to start an IO */
- ret = storvsc_do_io(dev, cmd_request, get_cpu());
- put_cpu();
+ migrate_disable();
+ ret = storvsc_do_io(dev, cmd_request, smp_processor_id());
+ migrate_enable();
if (ret)
scsi_dma_unmap(scmnd);
diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c
index b99de8c4cc99..33f211e159ef 100644
--- a/drivers/spi/spi-stm32.c
+++ b/drivers/spi/spi-stm32.c
@@ -1625,6 +1625,9 @@ static int stm32_spi_prepare_rx_dma_mdma_chaining(struct stm32_spi *spi,
return -EINVAL;
}
+ *rx_mdma_desc = _mdma_desc;
+ *rx_dma_desc = _dma_desc;
+
return 0;
}
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 847b55789bb8..899e663fea6e 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -24,6 +24,7 @@
#include <linux/pm_opp.h>
#include <linux/regulator/consumer.h>
#include <linux/sched/clock.h>
+#include <linux/sizes.h>
#include <linux/iopoll.h>
#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_dbg.h>
@@ -517,8 +518,8 @@ static void ufshcd_add_command_trace(struct ufs_hba *hba, struct scsi_cmnd *cmd,
if (hba->mcq_enabled) {
struct ufs_hw_queue *hwq = ufshcd_mcq_req_to_hwq(hba, rq);
-
- hwq_id = hwq->id;
+ if (hwq)
+ hwq_id = hwq->id;
} else {
doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
}
@@ -4389,14 +4390,6 @@ out_unlock:
spin_unlock_irqrestore(hba->host->host_lock, flags);
mutex_unlock(&hba->uic_cmd_mutex);
- /*
- * If the h8 exit fails during the runtime resume process, it becomes
- * stuck and cannot be recovered through the error handler. To fix
- * this, use link recovery instead of the error handler.
- */
- if (ret && hba->pm_op_in_progress)
- ret = ufshcd_link_recovery(hba);
-
return ret;
}
@@ -5249,6 +5242,25 @@ static void ufshcd_lu_init(struct ufs_hba *hba, struct scsi_device *sdev)
hba->dev_info.rpmb_region_size[1] = desc_buf[RPMB_UNIT_DESC_PARAM_REGION1_SIZE];
hba->dev_info.rpmb_region_size[2] = desc_buf[RPMB_UNIT_DESC_PARAM_REGION2_SIZE];
hba->dev_info.rpmb_region_size[3] = desc_buf[RPMB_UNIT_DESC_PARAM_REGION3_SIZE];
+
+ if (hba->dev_info.wspecversion <= 0x0220) {
+ /*
+ * These older spec chips have only one RPMB region,
+ * sized between 128 kB minimum and 16 MB maximum.
+ * No per region size fields are provided (respective
+ * REGIONX_SIZE fields always contain zeros), so get
+ * it from the logical block count and size fields for
+ * compatibility
+ *
+ * (See JESD220C-2_2 Section 14.1.4.6
+ * RPMB Unit Descriptor,* offset 13h, 4 bytes)
+ */
+ hba->dev_info.rpmb_region_size[0] =
+ (get_unaligned_be64(desc_buf
+ + RPMB_UNIT_DESC_PARAM_LOGICAL_BLK_COUNT)
+ << desc_buf[RPMB_UNIT_DESC_PARAM_LOGICAL_BLK_SIZE])
+ / SZ_128K;
+ }
}
@@ -5963,6 +5975,7 @@ static int ufshcd_disable_auto_bkops(struct ufs_hba *hba)
hba->auto_bkops_enabled = false;
trace_ufshcd_auto_bkops_state(hba, "Disabled");
+ hba->urgent_bkops_lvl = BKOPS_STATUS_PERF_IMPACT;
hba->is_urgent_bkops_lvl_checked = false;
out:
return err;
@@ -6066,7 +6079,7 @@ static void ufshcd_bkops_exception_event_handler(struct ufs_hba *hba)
* impacted or critical. Handle these device by determining their urgent
* bkops status at runtime.
*/
- if (curr_status < BKOPS_STATUS_PERF_IMPACT) {
+ if ((curr_status > BKOPS_STATUS_NO_OP) && (curr_status < BKOPS_STATUS_PERF_IMPACT)) {
dev_err(hba->dev, "%s: device raised urgent BKOPS exception for bkops status %d\n",
__func__, curr_status);
/* update the current status as the urgent bkops level */
@@ -7097,7 +7110,7 @@ static irqreturn_t ufshcd_handle_mcq_cq_events(struct ufs_hba *hba)
ret = ufshcd_vops_get_outstanding_cqs(hba, &outstanding_cqs);
if (ret)
- outstanding_cqs = (1U << hba->nr_hw_queues) - 1;
+ outstanding_cqs = (1ULL << hba->nr_hw_queues) - 1;
/* Exclude the poll queues */
nr_queues = hba->nr_hw_queues - hba->nr_queues[HCTX_TYPE_POLL];
@@ -10179,7 +10192,15 @@ static int __ufshcd_wl_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op)
} else {
dev_err(hba->dev, "%s: hibern8 exit failed %d\n",
__func__, ret);
- goto vendor_suspend;
+ /*
+ * If the h8 exit fails during the runtime resume
+ * process, it becomes stuck and cannot be recovered
+ * through the error handler. To fix this, use link
+ * recovery instead of the error handler.
+ */
+ ret = ufshcd_link_recovery(hba);
+ if (ret)
+ goto vendor_suspend;
}
} else if (ufshcd_is_link_off(hba)) {
/*
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 8e89cc5b2820..fb857faaf0d6 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -47,6 +47,7 @@
#include <linux/dax.h>
#include <linux/uaccess.h>
#include <uapi/linux/rseq.h>
+#include <linux/rseq.h>
#include <asm/param.h>
#include <asm/page.h>
@@ -286,7 +287,7 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
}
#ifdef CONFIG_RSEQ
NEW_AUX_ENT(AT_RSEQ_FEATURE_SIZE, offsetof(struct rseq, end));
- NEW_AUX_ENT(AT_RSEQ_ALIGN, __alignof__(struct rseq));
+ NEW_AUX_ENT(AT_RSEQ_ALIGN, rseq_alloc_align());
#endif
#undef NEW_AUX_ENT
/* AT_NULL is zero; clear the rest too */
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index c284f48cfae4..2a886bece810 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -3340,7 +3340,6 @@ again:
btrfs_abort_transaction(trans, ret);
goto out_put;
}
- WARN_ON(ret);
/* We've already setup this transaction, go ahead and exit */
if (block_group->cache_generation == trans->transid &&
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index d97bbbd045e0..56ff8afe9a22 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1657,7 +1657,7 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
if (unlikely(ret)) {
btrfs_err(trans->fs_info,
"failed to add delayed dir index item, root: %llu, inode: %llu, index: %llu, error: %d",
- index, btrfs_root_id(node->root), node->inode_id, ret);
+ btrfs_root_id(node->root), node->inode_id, index, ret);
btrfs_delayed_item_release_metadata(dir->root, item);
btrfs_release_delayed_item(item);
}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f6fa15a1193f..e4fad777b034 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1994,7 +1994,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
int level = btrfs_super_log_root_level(disk_super);
if (unlikely(fs_devices->rw_devices == 0)) {
- btrfs_warn(fs_info, "log replay required on RO media");
+ btrfs_err(fs_info, "log replay required on RO media");
return -EIO;
}
@@ -2008,9 +2008,9 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
check.owner_root = BTRFS_TREE_LOG_OBJECTID;
log_tree_root->node = read_tree_block(fs_info, bytenr, &check);
if (IS_ERR(log_tree_root->node)) {
- btrfs_warn(fs_info, "failed to read log tree");
ret = PTR_ERR(log_tree_root->node);
log_tree_root->node = NULL;
+ btrfs_err(fs_info, "failed to read log tree with error: %d", ret);
btrfs_put_root(log_tree_root);
return ret;
}
@@ -2023,9 +2023,9 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
/* returns with log_tree_root freed on success */
ret = btrfs_recover_log_trees(log_tree_root);
btrfs_put_root(log_tree_root);
- if (ret) {
- btrfs_handle_fs_error(fs_info, ret,
- "Failed to recover log tree");
+ if (unlikely(ret)) {
+ ASSERT(BTRFS_FS_ERROR(fs_info) != 0);
+ btrfs_err(fs_info, "failed to recover log trees with error: %d", ret);
return ret;
}
@@ -2972,7 +2972,6 @@ static int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info)
task = kthread_run(btrfs_uuid_rescan_kthread, fs_info, "btrfs-uuid");
if (IS_ERR(task)) {
/* fs_info->update_uuid_tree_gen remains 0 in all error case */
- btrfs_warn(fs_info, "failed to start uuid_rescan task");
up(&fs_info->uuid_tree_rescan_sem);
return PTR_ERR(task);
}
@@ -3188,7 +3187,7 @@ int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount)
if (incompat & ~BTRFS_FEATURE_INCOMPAT_SUPP) {
btrfs_err(fs_info,
"cannot mount because of unknown incompat features (0x%llx)",
- incompat);
+ incompat & ~BTRFS_FEATURE_INCOMPAT_SUPP);
return -EINVAL;
}
@@ -3220,7 +3219,7 @@ int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount)
if (compat_ro_unsupp && is_rw_mount) {
btrfs_err(fs_info,
"cannot mount read-write because of unknown compat_ro features (0x%llx)",
- compat_ro);
+ compat_ro_unsupp);
return -EINVAL;
}
@@ -3233,7 +3232,7 @@ int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount)
!btrfs_test_opt(fs_info, NOLOGREPLAY)) {
btrfs_err(fs_info,
"cannot replay dirty log with unsupported compat_ro features (0x%llx), try rescue=nologreplay",
- compat_ro);
+ compat_ro_unsupp);
return -EINVAL;
}
@@ -3642,7 +3641,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
fs_info->fs_root = btrfs_get_fs_root(fs_info, BTRFS_FS_TREE_OBJECTID, true);
if (IS_ERR(fs_info->fs_root)) {
ret = PTR_ERR(fs_info->fs_root);
- btrfs_warn(fs_info, "failed to read fs tree: %d", ret);
+ btrfs_err(fs_info, "failed to read fs tree: %d", ret);
fs_info->fs_root = NULL;
goto fail_qgroup;
}
@@ -3663,8 +3662,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
btrfs_info(fs_info, "checking UUID tree");
ret = btrfs_check_uuid_tree(fs_info);
if (ret) {
- btrfs_warn(fs_info,
- "failed to check the UUID tree: %d", ret);
+ btrfs_err(fs_info, "failed to check the UUID tree: %d", ret);
close_ctree(fs_info);
return ret;
}
@@ -4399,9 +4397,17 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
*/
btrfs_flush_workqueue(fs_info->delayed_workers);
- ret = btrfs_commit_super(fs_info);
- if (ret)
- btrfs_err(fs_info, "commit super ret %d", ret);
+ /*
+ * If the filesystem is shutdown, then an attempt to commit the
+ * super block (or any write) will just fail. Since we freeze
+ * the filesystem before shutting it down, the filesystem is in
+ * a consistent state and we don't need to commit super blocks.
+ */
+ if (!btrfs_is_shutdown(fs_info)) {
+ ret = btrfs_commit_super(fs_info);
+ if (ret)
+ btrfs_err(fs_info, "commit super block returned %d", ret);
+ }
}
kthread_stop(fs_info->transaction_kthread);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 03cf9f242c70..b0d9baf5b412 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2933,9 +2933,15 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
while (!TRANS_ABORTED(trans) && cached_state) {
struct extent_state *next_state;
- if (btrfs_test_opt(fs_info, DISCARD_SYNC))
+ if (btrfs_test_opt(fs_info, DISCARD_SYNC)) {
ret = btrfs_discard_extent(fs_info, start,
end + 1 - start, NULL, true);
+ if (ret) {
+ btrfs_warn(fs_info,
+ "discard failed for extent [%llu, %llu]: errno=%d %s",
+ start, end, ret, btrfs_decode_error(ret));
+ }
+ }
next_state = btrfs_next_extent_state(unpin, cached_state);
btrfs_clear_extent_dirty(unpin, start, end, &cached_state);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 6efb543f1c24..a11fcc9e9f50 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1392,10 +1392,25 @@ static int cow_one_range(struct btrfs_inode *inode, struct folio *locked_folio,
return ret;
free_reserved:
+ /*
+ * If we have reserved an extent for the current range and failed to
+ * create the respective extent map or ordered extent, it means that
+ * when we reserved the extent we decremented the extent's size from
+ * the data space_info's bytes_may_use counter and
+ * incremented the space_info's bytes_reserved counter by the same
+ * amount.
+ *
+ * We must make sure extent_clear_unlock_delalloc() does not try
+ * to decrement again the data space_info's bytes_may_use counter, which
+ * will be handled by btrfs_free_reserved_extent().
+ *
+ * Therefore we do not pass it the flag EXTENT_CLEAR_DATA_RESV, but only
+ * EXTENT_CLEAR_META_RESV.
+ */
extent_clear_unlock_delalloc(inode, file_offset, cur_end, locked_folio, cached,
EXTENT_LOCKED | EXTENT_DELALLOC |
EXTENT_DELALLOC_NEW |
- EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING,
+ EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV,
PAGE_UNLOCK | PAGE_START_WRITEBACK |
PAGE_END_WRITEBACK);
btrfs_qgroup_free_data(inode, NULL, file_offset, cur_len, NULL);
@@ -4764,7 +4779,7 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
spin_unlock(&dest->root_item_lock);
btrfs_warn(fs_info,
"attempt to delete subvolume %llu with active swapfile",
- btrfs_root_id(root));
+ btrfs_root_id(dest));
ret = -EPERM;
goto out_up_write;
}
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index ae2173235c4d..a1fd44c44ecf 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -4581,7 +4581,7 @@ static int btrfs_uring_read_extent(struct kiocb *iocb, struct iov_iter *iter,
{
struct btrfs_inode *inode = BTRFS_I(file_inode(iocb->ki_filp));
struct extent_io_tree *io_tree = &inode->io_tree;
- struct page **pages;
+ struct page **pages = NULL;
struct btrfs_uring_priv *priv = NULL;
unsigned long nr_pages;
int ret;
@@ -4639,6 +4639,11 @@ out_fail:
btrfs_unlock_extent(io_tree, start, lockend, &cached_state);
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
kfree(priv);
+ for (int i = 0; i < nr_pages; i++) {
+ if (pages[i])
+ __free_page(pages[i]);
+ }
+ kfree(pages);
return ret;
}
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 3cdd9755dc52..3b2a6517d0b5 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -370,7 +370,7 @@ static bool squota_check_parent_usage(struct btrfs_fs_info *fs_info, struct btrf
nr_members++;
}
mismatch = (parent->excl != excl_sum || parent->rfer != rfer_sum ||
- parent->excl_cmpr != excl_cmpr_sum || parent->rfer_cmpr != excl_cmpr_sum);
+ parent->excl_cmpr != excl_cmpr_sum || parent->rfer_cmpr != rfer_cmpr_sum);
WARN(mismatch,
"parent squota qgroup %hu/%llu has mismatched usage from its %d members. "
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 95db7c48fbad..a330d8624b83 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -4723,6 +4723,7 @@ int btrfs_last_identity_remap_gone(struct btrfs_chunk_map *chunk_map,
ret = btrfs_remove_dev_extents(trans, chunk_map);
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
+ btrfs_end_transaction(trans);
return ret;
}
@@ -4732,6 +4733,7 @@ int btrfs_last_identity_remap_gone(struct btrfs_chunk_map *chunk_map,
if (unlikely(ret)) {
mutex_unlock(&trans->fs_info->chunk_mutex);
btrfs_abort_transaction(trans, ret);
+ btrfs_end_transaction(trans);
return ret;
}
}
@@ -4750,6 +4752,7 @@ int btrfs_last_identity_remap_gone(struct btrfs_chunk_map *chunk_map,
ret = remove_chunk_stripes(trans, chunk_map, path);
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
+ btrfs_end_transaction(trans);
return ret;
}
@@ -5982,6 +5985,9 @@ static int remove_range_from_remap_tree(struct btrfs_trans_handle *trans,
struct btrfs_block_group *dest_bg;
dest_bg = btrfs_lookup_block_group(fs_info, new_addr);
+ if (unlikely(!dest_bg))
+ return -EUCLEAN;
+
adjust_block_group_remap_bytes(trans, dest_bg, -overlap_length);
btrfs_put_block_group(dest_bg);
ret = btrfs_add_to_free_space_tree(trans,
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 81022d912abb..bc94bbc00772 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -743,7 +743,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
btrfs_warn_rl(fs_info,
"scrub: tree block %llu mirror %u has bad fsid, has %pU want %pU",
logical, stripe->mirror_num,
- header->fsid, fs_info->fs_devices->fsid);
+ header->fsid, fs_info->fs_devices->metadata_uuid);
return;
}
if (memcmp(header->chunk_tree_uuid, fs_info->chunk_tree_uuid,
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index 452394b34d01..ac4c4573ee39 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -1740,7 +1740,7 @@ static int check_extent_data_ref(struct extent_buffer *leaf,
objectid > BTRFS_LAST_FREE_OBJECTID)) {
extent_err(leaf, slot,
"invalid extent data backref objectid value %llu",
- root);
+ objectid);
return -EUCLEAN;
}
if (unlikely(!IS_ALIGNED(offset, leaf->fs_info->sectorsize))) {
@@ -1921,7 +1921,7 @@ static int check_dev_extent_item(const struct extent_buffer *leaf,
if (unlikely(prev_key->offset + prev_len > key->offset)) {
generic_err(leaf, slot,
"dev extent overlap, prev offset %llu len %llu current offset %llu",
- prev_key->objectid, prev_len, key->offset);
+ prev_key->offset, prev_len, key->offset);
return -EUCLEAN;
}
}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 6fb0c4cd50ff..648bb09fc416 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6907,7 +6907,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
ret = btrfs_translate_remap(fs_info, &new_logical, length);
if (ret)
- return ret;
+ goto out;
if (new_logical != logical) {
btrfs_free_chunk_map(map);
@@ -6921,8 +6921,10 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
}
num_copies = btrfs_chunk_map_num_copies(map);
- if (io_geom.mirror_num > num_copies)
- return -EINVAL;
+ if (io_geom.mirror_num > num_copies) {
+ ret = -EINVAL;
+ goto out;
+ }
map_offset = logical - map->start;
io_geom.raid56_full_stripe_start = (u64)-1;
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index bc82083e420a..00f0efaf12b2 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -80,18 +80,27 @@ static void iomap_set_range_uptodate(struct folio *folio, size_t off,
{
struct iomap_folio_state *ifs = folio->private;
unsigned long flags;
- bool uptodate = true;
+ bool mark_uptodate = true;
if (folio_test_uptodate(folio))
return;
if (ifs) {
spin_lock_irqsave(&ifs->state_lock, flags);
- uptodate = ifs_set_range_uptodate(folio, ifs, off, len);
+ /*
+ * If a read with bytes pending is in progress, we must not call
+ * folio_mark_uptodate(). The read completion path
+ * (iomap_read_end()) will call folio_end_read(), which uses XOR
+ * semantics to set the uptodate bit. If we set it here, the XOR
+ * in folio_end_read() will clear it, leaving the folio not
+ * uptodate.
+ */
+ mark_uptodate = ifs_set_range_uptodate(folio, ifs, off, len) &&
+ !ifs->read_bytes_pending;
spin_unlock_irqrestore(&ifs->state_lock, flags);
}
- if (uptodate)
+ if (mark_uptodate)
folio_mark_uptodate(folio);
}
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 95254aa1b654..e911daedff65 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -87,6 +87,19 @@ static inline enum fserror_type iomap_dio_err_type(const struct iomap_dio *dio)
return FSERR_DIRECTIO_READ;
}
+static inline bool should_report_dio_fserror(const struct iomap_dio *dio)
+{
+ switch (dio->error) {
+ case 0:
+ case -EAGAIN:
+ case -ENOTBLK:
+ /* don't send fsnotify for success or magic retry codes */
+ return false;
+ default:
+ return true;
+ }
+}
+
ssize_t iomap_dio_complete(struct iomap_dio *dio)
{
const struct iomap_dio_ops *dops = dio->dops;
@@ -96,7 +109,7 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio)
if (dops && dops->end_io)
ret = dops->end_io(iocb, dio->size, ret, dio->flags);
- if (dio->error)
+ if (should_report_dio_fserror(dio))
fserror_report_io(file_inode(iocb->ki_filp),
iomap_dio_err_type(dio), offset, dio->size,
dio->error, GFP_NOFS);
diff --git a/fs/iomap/ioend.c b/fs/iomap/ioend.c
index 4d1ef8a2cee9..60546fa14dfe 100644
--- a/fs/iomap/ioend.c
+++ b/fs/iomap/ioend.c
@@ -215,17 +215,18 @@ ssize_t iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, struct folio *folio,
WARN_ON_ONCE(!folio->private && map_len < dirty_len);
switch (wpc->iomap.type) {
- case IOMAP_INLINE:
- WARN_ON_ONCE(1);
- return -EIO;
+ case IOMAP_UNWRITTEN:
+ ioend_flags |= IOMAP_IOEND_UNWRITTEN;
+ break;
+ case IOMAP_MAPPED:
+ break;
case IOMAP_HOLE:
return map_len;
default:
- break;
+ WARN_ON_ONCE(1);
+ return -EIO;
}
- if (wpc->iomap.type == IOMAP_UNWRITTEN)
- ioend_flags |= IOMAP_IOEND_UNWRITTEN;
if (wpc->iomap.flags & IOMAP_F_SHARED)
ioend_flags |= IOMAP_IOEND_SHARED;
if (folio_test_dropbehind(folio))
diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c
index a9d1c3b2c084..dd1451bf7543 100644
--- a/fs/netfs/direct_write.c
+++ b/fs/netfs/direct_write.c
@@ -10,6 +10,202 @@
#include "internal.h"
/*
+ * Perform the cleanup rituals after an unbuffered write is complete.
+ */
+static void netfs_unbuffered_write_done(struct netfs_io_request *wreq)
+{
+ struct netfs_inode *ictx = netfs_inode(wreq->inode);
+
+ _enter("R=%x", wreq->debug_id);
+
+ /* Okay, declare that all I/O is complete. */
+ trace_netfs_rreq(wreq, netfs_rreq_trace_write_done);
+
+ if (!wreq->error)
+ netfs_update_i_size(ictx, &ictx->inode, wreq->start, wreq->transferred);
+
+ if (wreq->origin == NETFS_DIO_WRITE &&
+ wreq->mapping->nrpages) {
+ /* mmap may have got underfoot and we may now have folios
+ * locally covering the region we just wrote. Attempt to
+ * discard the folios, but leave in place any modified locally.
+ * ->write_iter() is prevented from interfering by the DIO
+ * counter.
+ */
+ pgoff_t first = wreq->start >> PAGE_SHIFT;
+ pgoff_t last = (wreq->start + wreq->transferred - 1) >> PAGE_SHIFT;
+
+ invalidate_inode_pages2_range(wreq->mapping, first, last);
+ }
+
+ if (wreq->origin == NETFS_DIO_WRITE)
+ inode_dio_end(wreq->inode);
+
+ _debug("finished");
+ netfs_wake_rreq_flag(wreq, NETFS_RREQ_IN_PROGRESS, netfs_rreq_trace_wake_ip);
+ /* As we cleared NETFS_RREQ_IN_PROGRESS, we acquired its ref. */
+
+ if (wreq->iocb) {
+ size_t written = umin(wreq->transferred, wreq->len);
+
+ wreq->iocb->ki_pos += written;
+ if (wreq->iocb->ki_complete) {
+ trace_netfs_rreq(wreq, netfs_rreq_trace_ki_complete);
+ wreq->iocb->ki_complete(wreq->iocb, wreq->error ?: written);
+ }
+ wreq->iocb = VFS_PTR_POISON;
+ }
+
+ netfs_clear_subrequests(wreq);
+}
+
+/*
+ * Collect the subrequest results of unbuffered write subrequests.
+ */
+static void netfs_unbuffered_write_collect(struct netfs_io_request *wreq,
+ struct netfs_io_stream *stream,
+ struct netfs_io_subrequest *subreq)
+{
+ trace_netfs_collect_sreq(wreq, subreq);
+
+ spin_lock(&wreq->lock);
+ list_del_init(&subreq->rreq_link);
+ spin_unlock(&wreq->lock);
+
+ wreq->transferred += subreq->transferred;
+ iov_iter_advance(&wreq->buffer.iter, subreq->transferred);
+
+ stream->collected_to = subreq->start + subreq->transferred;
+ wreq->collected_to = stream->collected_to;
+ netfs_put_subrequest(subreq, netfs_sreq_trace_put_done);
+
+ trace_netfs_collect_stream(wreq, stream);
+ trace_netfs_collect_state(wreq, wreq->collected_to, 0);
+}
+
+/*
+ * Write data to the server without going through the pagecache and without
+ * writing it to the local cache. We dispatch the subrequests serially and
+ * wait for each to complete before dispatching the next, lest we leave a gap
+ * in the data written due to a failure such as ENOSPC. We could, however
+ * attempt to do preparation such as content encryption for the next subreq
+ * whilst the current is in progress.
+ */
+static int netfs_unbuffered_write(struct netfs_io_request *wreq)
+{
+ struct netfs_io_subrequest *subreq = NULL;
+ struct netfs_io_stream *stream = &wreq->io_streams[0];
+ int ret;
+
+ _enter("%llx", wreq->len);
+
+ if (wreq->origin == NETFS_DIO_WRITE)
+ inode_dio_begin(wreq->inode);
+
+ stream->collected_to = wreq->start;
+
+ for (;;) {
+ bool retry = false;
+
+ if (!subreq) {
+ netfs_prepare_write(wreq, stream, wreq->start + wreq->transferred);
+ subreq = stream->construct;
+ stream->construct = NULL;
+ stream->front = NULL;
+ }
+
+ /* Check if (re-)preparation failed. */
+ if (unlikely(test_bit(NETFS_SREQ_FAILED, &subreq->flags))) {
+ netfs_write_subrequest_terminated(subreq, subreq->error);
+ wreq->error = subreq->error;
+ break;
+ }
+
+ iov_iter_truncate(&subreq->io_iter, wreq->len - wreq->transferred);
+ if (!iov_iter_count(&subreq->io_iter))
+ break;
+
+ subreq->len = netfs_limit_iter(&subreq->io_iter, 0,
+ stream->sreq_max_len,
+ stream->sreq_max_segs);
+ iov_iter_truncate(&subreq->io_iter, subreq->len);
+ stream->submit_extendable_to = subreq->len;
+
+ trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
+ stream->issue_write(subreq);
+
+ /* Async, need to wait. */
+ netfs_wait_for_in_progress_stream(wreq, stream);
+
+ if (test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) {
+ retry = true;
+ } else if (test_bit(NETFS_SREQ_FAILED, &subreq->flags)) {
+ ret = subreq->error;
+ wreq->error = ret;
+ netfs_see_subrequest(subreq, netfs_sreq_trace_see_failed);
+ subreq = NULL;
+ break;
+ }
+ ret = 0;
+
+ if (!retry) {
+ netfs_unbuffered_write_collect(wreq, stream, subreq);
+ subreq = NULL;
+ if (wreq->transferred >= wreq->len)
+ break;
+ if (!wreq->iocb && signal_pending(current)) {
+ ret = wreq->transferred ? -EINTR : -ERESTARTSYS;
+ trace_netfs_rreq(wreq, netfs_rreq_trace_intr);
+ break;
+ }
+ continue;
+ }
+
+ /* We need to retry the last subrequest, so first reset the
+ * iterator, taking into account what, if anything, we managed
+ * to transfer.
+ */
+ subreq->error = -EAGAIN;
+ trace_netfs_sreq(subreq, netfs_sreq_trace_retry);
+ if (subreq->transferred > 0)
+ iov_iter_advance(&wreq->buffer.iter, subreq->transferred);
+
+ if (stream->source == NETFS_UPLOAD_TO_SERVER &&
+ wreq->netfs_ops->retry_request)
+ wreq->netfs_ops->retry_request(wreq, stream);
+
+ __clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
+ __clear_bit(NETFS_SREQ_BOUNDARY, &subreq->flags);
+ __clear_bit(NETFS_SREQ_FAILED, &subreq->flags);
+ subreq->io_iter = wreq->buffer.iter;
+ subreq->start = wreq->start + wreq->transferred;
+ subreq->len = wreq->len - wreq->transferred;
+ subreq->transferred = 0;
+ subreq->retry_count += 1;
+ stream->sreq_max_len = UINT_MAX;
+ stream->sreq_max_segs = INT_MAX;
+
+ netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
+ stream->prepare_write(subreq);
+
+ __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
+ netfs_stat(&netfs_n_wh_retry_write_subreq);
+ }
+
+ netfs_unbuffered_write_done(wreq);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+static void netfs_unbuffered_write_async(struct work_struct *work)
+{
+ struct netfs_io_request *wreq = container_of(work, struct netfs_io_request, work);
+
+ netfs_unbuffered_write(wreq);
+ netfs_put_request(wreq, netfs_rreq_trace_put_complete);
+}
+
+/*
* Perform an unbuffered write where we may have to do an RMW operation on an
* encrypted file. This can also be used for direct I/O writes.
*/
@@ -70,35 +266,35 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
*/
wreq->buffer.iter = *iter;
}
+
+ wreq->len = iov_iter_count(&wreq->buffer.iter);
}
__set_bit(NETFS_RREQ_USE_IO_ITER, &wreq->flags);
- if (async)
- __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &wreq->flags);
/* Copy the data into the bounce buffer and encrypt it. */
// TODO
/* Dispatch the write. */
__set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags);
- if (async)
- wreq->iocb = iocb;
- wreq->len = iov_iter_count(&wreq->buffer.iter);
- ret = netfs_unbuffered_write(wreq, is_sync_kiocb(iocb), wreq->len);
- if (ret < 0) {
- _debug("begin = %zd", ret);
- goto out;
- }
- if (!async) {
- ret = netfs_wait_for_write(wreq);
- if (ret > 0)
- iocb->ki_pos += ret;
- } else {
+ if (async) {
+ INIT_WORK(&wreq->work, netfs_unbuffered_write_async);
+ wreq->iocb = iocb;
+ queue_work(system_dfl_wq, &wreq->work);
ret = -EIOCBQUEUED;
+ } else {
+ ret = netfs_unbuffered_write(wreq);
+ if (ret < 0) {
+ _debug("begin = %zd", ret);
+ } else {
+ iocb->ki_pos += wreq->transferred;
+ ret = wreq->transferred ?: wreq->error;
+ }
+
+ netfs_put_request(wreq, netfs_rreq_trace_put_complete);
}
-out:
netfs_put_request(wreq, netfs_rreq_trace_put_return);
return ret;
diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h
index 4319611f5354..d436e20d3418 100644
--- a/fs/netfs/internal.h
+++ b/fs/netfs/internal.h
@@ -198,6 +198,9 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
struct file *file,
loff_t start,
enum netfs_io_origin origin);
+void netfs_prepare_write(struct netfs_io_request *wreq,
+ struct netfs_io_stream *stream,
+ loff_t start);
void netfs_reissue_write(struct netfs_io_stream *stream,
struct netfs_io_subrequest *subreq,
struct iov_iter *source);
@@ -212,7 +215,6 @@ int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_c
struct folio **writethrough_cache);
ssize_t netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
struct folio *writethrough_cache);
-int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t len);
/*
* write_retry.c
diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c
index 61eab34ea67e..83eb3dc1adf8 100644
--- a/fs/netfs/write_collect.c
+++ b/fs/netfs/write_collect.c
@@ -399,27 +399,6 @@ bool netfs_write_collection(struct netfs_io_request *wreq)
ictx->ops->invalidate_cache(wreq);
}
- if ((wreq->origin == NETFS_UNBUFFERED_WRITE ||
- wreq->origin == NETFS_DIO_WRITE) &&
- !wreq->error)
- netfs_update_i_size(ictx, &ictx->inode, wreq->start, wreq->transferred);
-
- if (wreq->origin == NETFS_DIO_WRITE &&
- wreq->mapping->nrpages) {
- /* mmap may have got underfoot and we may now have folios
- * locally covering the region we just wrote. Attempt to
- * discard the folios, but leave in place any modified locally.
- * ->write_iter() is prevented from interfering by the DIO
- * counter.
- */
- pgoff_t first = wreq->start >> PAGE_SHIFT;
- pgoff_t last = (wreq->start + wreq->transferred - 1) >> PAGE_SHIFT;
- invalidate_inode_pages2_range(wreq->mapping, first, last);
- }
-
- if (wreq->origin == NETFS_DIO_WRITE)
- inode_dio_end(wreq->inode);
-
_debug("finished");
netfs_wake_rreq_flag(wreq, NETFS_RREQ_IN_PROGRESS, netfs_rreq_trace_wake_ip);
/* As we cleared NETFS_RREQ_IN_PROGRESS, we acquired its ref. */
diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c
index 34894da5a23e..437268f65640 100644
--- a/fs/netfs/write_issue.c
+++ b/fs/netfs/write_issue.c
@@ -154,9 +154,9 @@ EXPORT_SYMBOL(netfs_prepare_write_failed);
* Prepare a write subrequest. We need to allocate a new subrequest
* if we don't have one.
*/
-static void netfs_prepare_write(struct netfs_io_request *wreq,
- struct netfs_io_stream *stream,
- loff_t start)
+void netfs_prepare_write(struct netfs_io_request *wreq,
+ struct netfs_io_stream *stream,
+ loff_t start)
{
struct netfs_io_subrequest *subreq;
struct iov_iter *wreq_iter = &wreq->buffer.iter;
@@ -699,41 +699,6 @@ ssize_t netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_c
}
/*
- * Write data to the server without going through the pagecache and without
- * writing it to the local cache.
- */
-int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t len)
-{
- struct netfs_io_stream *upload = &wreq->io_streams[0];
- ssize_t part;
- loff_t start = wreq->start;
- int error = 0;
-
- _enter("%zx", len);
-
- if (wreq->origin == NETFS_DIO_WRITE)
- inode_dio_begin(wreq->inode);
-
- while (len) {
- // TODO: Prepare content encryption
-
- _debug("unbuffered %zx", len);
- part = netfs_advance_write(wreq, upload, start, len, false);
- start += part;
- len -= part;
- rolling_buffer_advance(&wreq->buffer, part);
- if (test_bit(NETFS_RREQ_PAUSE, &wreq->flags))
- netfs_wait_for_paused_write(wreq);
- if (test_bit(NETFS_RREQ_FAILED, &wreq->flags))
- break;
- }
-
- netfs_end_issue_write(wreq);
- _leave(" = %d", error);
- return error;
-}
-
-/*
* Write some of a pending folio data back to the server and/or the cache.
*/
static int netfs_write_folio_single(struct netfs_io_request *wreq,
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index e9acd2cd602c..4cc8a58fa56a 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -377,15 +377,15 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size)
}
/*
- * write_threads - Start NFSD, or report the current number of running threads
+ * write_threads - Start NFSD, or report the configured number of threads
*
* Input:
* buf: ignored
* size: zero
* Output:
* On success: passed-in buffer filled with '\n'-terminated C
- * string numeric value representing the number of
- * running NFSD threads;
+ * string numeric value representing the configured
+ * number of NFSD threads;
* return code is the size in bytes of the string
* On error: return code is zero
*
@@ -399,8 +399,8 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size)
* Output:
* On success: NFS service is started;
* passed-in buffer filled with '\n'-terminated C
- * string numeric value representing the number of
- * running NFSD threads;
+ * string numeric value representing the configured
+ * number of NFSD threads;
* return code is the size in bytes of the string
* On error: return code is zero or a negative errno value
*/
@@ -430,7 +430,7 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size)
}
/*
- * write_pool_threads - Set or report the current number of threads per pool
+ * write_pool_threads - Set or report the configured number of threads per pool
*
* Input:
* buf: ignored
@@ -447,7 +447,7 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size)
* Output:
* On success: passed-in buffer filled with '\n'-terminated C
* string containing integer values representing the
- * number of NFSD threads in each pool;
+ * configured number of NFSD threads in each pool;
* return code is the size in bytes of the string
* On error: return code is zero or a negative errno value
*/
@@ -1647,7 +1647,7 @@ int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info)
if (attr)
nn->min_threads = nla_get_u32(attr);
- ret = nfsd_svc(nrpools, nthreads, net, get_current_cred(), scope);
+ ret = nfsd_svc(nrpools, nthreads, net, current_cred(), scope);
if (ret > 0)
ret = 0;
out_unlock:
@@ -1657,7 +1657,7 @@ out_unlock:
}
/**
- * nfsd_nl_threads_get_doit - get the number of running threads
+ * nfsd_nl_threads_get_doit - get the maximum number of running threads
* @skb: reply buffer
* @info: netlink metadata and command arguments
*
@@ -1700,7 +1700,7 @@ int nfsd_nl_threads_get_doit(struct sk_buff *skb, struct genl_info *info)
struct svc_pool *sp = &nn->nfsd_serv->sv_pools[i];
err = nla_put_u32(skb, NFSD_A_SERVER_THREADS,
- sp->sp_nrthreads);
+ sp->sp_nrthrmax);
if (err)
goto err_unlock;
}
@@ -2000,7 +2000,7 @@ int nfsd_nl_listener_set_doit(struct sk_buff *skb, struct genl_info *info)
}
ret = svc_xprt_create_from_sa(serv, xcl_name, net, sa, 0,
- get_current_cred());
+ current_cred());
/* always save the latest error */
if (ret < 0)
err = ret;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 0887ee601d3c..4a04208393b8 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -239,12 +239,13 @@ static void nfsd_net_free(struct percpu_ref *ref)
int nfsd_nrthreads(struct net *net)
{
- int rv = 0;
+ int i, rv = 0;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
mutex_lock(&nfsd_mutex);
if (nn->nfsd_serv)
- rv = nn->nfsd_serv->sv_nrthreads;
+ for (i = 0; i < nn->nfsd_serv->sv_nrpools; ++i)
+ rv += nn->nfsd_serv->sv_pools[i].sp_nrthrmax;
mutex_unlock(&nfsd_mutex);
return rv;
}
@@ -659,7 +660,7 @@ int nfsd_get_nrthreads(int n, int *nthreads, struct net *net)
if (serv)
for (i = 0; i < serv->sv_nrpools && i < n; i++)
- nthreads[i] = serv->sv_pools[i].sp_nrthreads;
+ nthreads[i] = serv->sv_pools[i].sp_nrthrmax;
return 0;
}
diff --git a/fs/nsfs.c b/fs/nsfs.c
index db91de208645..c215878d55e8 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -199,6 +199,17 @@ static bool nsfs_ioctl_valid(unsigned int cmd)
return false;
}
+static bool may_use_nsfs_ioctl(unsigned int cmd)
+{
+ switch (_IOC_NR(cmd)) {
+ case _IOC_NR(NS_MNT_GET_NEXT):
+ fallthrough;
+ case _IOC_NR(NS_MNT_GET_PREV):
+ return may_see_all_namespaces();
+ }
+ return true;
+}
+
static long ns_ioctl(struct file *filp, unsigned int ioctl,
unsigned long arg)
{
@@ -214,6 +225,8 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl,
if (!nsfs_ioctl_valid(ioctl))
return -ENOIOCTLCMD;
+ if (!may_use_nsfs_ioctl(ioctl))
+ return -EPERM;
ns = get_proc_ns(file_inode(filp));
switch (ioctl) {
@@ -614,7 +627,7 @@ static struct dentry *nsfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
return ERR_PTR(-EOPNOTSUPP);
}
- if (owning_ns && !ns_capable(owning_ns, CAP_SYS_ADMIN)) {
+ if (owning_ns && !may_see_all_namespaces()) {
ns->ops->put(ns);
return ERR_PTR(-EPERM);
}
diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
index c327c246a9b4..04bb95091f49 100644
--- a/fs/smb/client/cached_dir.c
+++ b/fs/smb/client/cached_dir.c
@@ -118,7 +118,7 @@ static const char *path_no_prefix(struct cifs_sb_info *cifs_sb,
if (!*path)
return path;
- if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) &&
+ if ((cifs_sb_flags(cifs_sb) & CIFS_MOUNT_USE_PREFIX_PATH) &&
cifs_sb->prepath) {
len = strlen(cifs_sb->prepath) + 1;
if (unlikely(len > strlen(path)))
diff --git a/fs/smb/client/cifs_fs_sb.h b/fs/smb/client/cifs_fs_sb.h
index 5e8d163cb5f8..84e7e366b0ff 100644
--- a/fs/smb/client/cifs_fs_sb.h
+++ b/fs/smb/client/cifs_fs_sb.h
@@ -55,7 +55,7 @@ struct cifs_sb_info {
struct nls_table *local_nls;
struct smb3_fs_context *ctx;
atomic_t active;
- unsigned int mnt_cifs_flags;
+ atomic_t mnt_cifs_flags;
struct delayed_work prune_tlinks;
struct rcu_head rcu;
diff --git a/fs/smb/client/cifs_ioctl.h b/fs/smb/client/cifs_ioctl.h
index b51ce64fcccf..147496ac9f9f 100644
--- a/fs/smb/client/cifs_ioctl.h
+++ b/fs/smb/client/cifs_ioctl.h
@@ -122,11 +122,3 @@ struct smb3_notify_info {
#define CIFS_GOING_FLAGS_DEFAULT 0x0 /* going down */
#define CIFS_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */
#define CIFS_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */
-
-static inline bool cifs_forced_shutdown(struct cifs_sb_info *sbi)
-{
- if (CIFS_MOUNT_SHUTDOWN & sbi->mnt_cifs_flags)
- return true;
- else
- return false;
-}
diff --git a/fs/smb/client/cifs_unicode.c b/fs/smb/client/cifs_unicode.c
index e7891b4406f2..e2edc207cef2 100644
--- a/fs/smb/client/cifs_unicode.c
+++ b/fs/smb/client/cifs_unicode.c
@@ -11,20 +11,6 @@
#include "cifsglob.h"
#include "cifs_debug.h"
-int cifs_remap(struct cifs_sb_info *cifs_sb)
-{
- int map_type;
-
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SFM_CHR)
- map_type = SFM_MAP_UNI_RSVD;
- else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR)
- map_type = SFU_MAP_UNI_RSVD;
- else
- map_type = NO_MAP_UNI_RSVD;
-
- return map_type;
-}
-
/* Convert character using the SFU - "Services for Unix" remapping range */
static bool
convert_sfu_char(const __u16 src_char, char *target)
diff --git a/fs/smb/client/cifs_unicode.h b/fs/smb/client/cifs_unicode.h
index 9249db3b78c3..3e9cd9acf0a9 100644
--- a/fs/smb/client/cifs_unicode.h
+++ b/fs/smb/client/cifs_unicode.h
@@ -22,6 +22,7 @@
#include <linux/types.h>
#include <linux/nls.h>
#include "../../nls/nls_ucs2_utils.h"
+#include "cifsglob.h"
/*
* Macs use an older "SFM" mapping of the symbols above. Fortunately it does
@@ -65,10 +66,21 @@ char *cifs_strndup_from_utf16(const char *src, const int maxlen,
const struct nls_table *codepage);
int cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
const struct nls_table *cp, int map_chars);
-int cifs_remap(struct cifs_sb_info *cifs_sb);
__le16 *cifs_strndup_to_utf16(const char *src, const int maxlen,
int *utf16_len, const struct nls_table *cp,
int remap);
wchar_t cifs_toupper(wchar_t in);
+static inline int cifs_remap(const struct cifs_sb_info *cifs_sb)
+{
+ unsigned int sbflags = cifs_sb_flags(cifs_sb);
+
+ if (sbflags & CIFS_MOUNT_MAP_SFM_CHR)
+ return SFM_MAP_UNI_RSVD;
+ if (sbflags & CIFS_MOUNT_MAP_SPECIAL_CHR)
+ return SFU_MAP_UNI_RSVD;
+
+ return NO_MAP_UNI_RSVD;
+}
+
#endif /* _CIFS_UNICODE_H */
diff --git a/fs/smb/client/cifsacl.c b/fs/smb/client/cifsacl.c
index 6fa12c901c14..f4cb3018a358 100644
--- a/fs/smb/client/cifsacl.c
+++ b/fs/smb/client/cifsacl.c
@@ -356,7 +356,7 @@ sid_to_id(struct cifs_sb_info *cifs_sb, struct smb_sid *psid,
psid->num_subauth, SID_MAX_SUB_AUTHORITIES);
}
- if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UID_FROM_ACL) ||
+ if ((cifs_sb_flags(cifs_sb) & CIFS_MOUNT_UID_FROM_ACL) ||
(cifs_sb_master_tcon(cifs_sb)->posix_extensions)) {
uint32_t unix_id;
bool is_group;
@@ -1612,7 +1612,8 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode,
struct smb_acl *dacl_ptr = NULL;
struct smb_ntsd *pntsd = NULL; /* acl obtained from server */
struct smb_ntsd *pnntsd = NULL; /* modified acl to be sent to server */
- struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode);
+ unsigned int sbflags;
struct tcon_link *tlink;
struct smb_version_operations *ops;
bool mode_from_sid, id_from_sid;
@@ -1643,15 +1644,9 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode,
return rc;
}
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID)
- mode_from_sid = true;
- else
- mode_from_sid = false;
-
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UID_FROM_ACL)
- id_from_sid = true;
- else
- id_from_sid = false;
+ sbflags = cifs_sb_flags(cifs_sb);
+ mode_from_sid = sbflags & CIFS_MOUNT_MODE_FROM_SID;
+ id_from_sid = sbflags & CIFS_MOUNT_UID_FROM_ACL;
/* Potentially, five new ACEs can be added to the ACL for U,G,O mapping */
if (pnmode && *pnmode != NO_CHANGE_64) { /* chmod */
diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
index 99b04234a08e..427558404aa5 100644
--- a/fs/smb/client/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -226,16 +226,18 @@ cifs_sb_deactive(struct super_block *sb)
static int
cifs_read_super(struct super_block *sb)
{
- struct inode *inode;
struct cifs_sb_info *cifs_sb;
struct cifs_tcon *tcon;
+ unsigned int sbflags;
struct timespec64 ts;
+ struct inode *inode;
int rc = 0;
cifs_sb = CIFS_SB(sb);
tcon = cifs_sb_master_tcon(cifs_sb);
+ sbflags = cifs_sb_flags(cifs_sb);
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIXACL)
+ if (sbflags & CIFS_MOUNT_POSIXACL)
sb->s_flags |= SB_POSIXACL;
if (tcon->snapshot_time)
@@ -311,7 +313,7 @@ cifs_read_super(struct super_block *sb)
}
#ifdef CONFIG_CIFS_NFSD_EXPORT
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) {
+ if (sbflags & CIFS_MOUNT_SERVER_INUM) {
cifs_dbg(FYI, "export ops supported\n");
sb->s_export_op = &cifs_export_ops;
}
@@ -389,8 +391,7 @@ statfs_out:
static long cifs_fallocate(struct file *file, int mode, loff_t off, loff_t len)
{
- struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
- struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
+ struct cifs_tcon *tcon = cifs_sb_master_tcon(CIFS_SB(file));
struct TCP_Server_Info *server = tcon->ses->server;
struct inode *inode = file_inode(file);
int rc;
@@ -418,11 +419,9 @@ out_unlock:
static int cifs_permission(struct mnt_idmap *idmap,
struct inode *inode, int mask)
{
- struct cifs_sb_info *cifs_sb;
+ unsigned int sbflags = cifs_sb_flags(CIFS_SB(inode));
- cifs_sb = CIFS_SB(inode->i_sb);
-
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) {
+ if (sbflags & CIFS_MOUNT_NO_PERM) {
if ((mask & MAY_EXEC) && !execute_ok(inode))
return -EACCES;
else
@@ -568,15 +567,17 @@ cifs_show_security(struct seq_file *s, struct cifs_ses *ses)
static void
cifs_show_cache_flavor(struct seq_file *s, struct cifs_sb_info *cifs_sb)
{
+ unsigned int sbflags = cifs_sb_flags(cifs_sb);
+
seq_puts(s, ",cache=");
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
+ if (sbflags & CIFS_MOUNT_STRICT_IO)
seq_puts(s, "strict");
- else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO)
+ else if (sbflags & CIFS_MOUNT_DIRECT_IO)
seq_puts(s, "none");
- else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RW_CACHE)
+ else if (sbflags & CIFS_MOUNT_RW_CACHE)
seq_puts(s, "singleclient"); /* assume only one client access */
- else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RO_CACHE)
+ else if (sbflags & CIFS_MOUNT_RO_CACHE)
seq_puts(s, "ro"); /* read only caching assumed */
else
seq_puts(s, "loose");
@@ -637,6 +638,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
struct cifs_sb_info *cifs_sb = CIFS_SB(root->d_sb);
struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
struct sockaddr *srcaddr;
+ unsigned int sbflags;
+
srcaddr = (struct sockaddr *)&tcon->ses->server->srcaddr;
seq_show_option(s, "vers", tcon->ses->server->vals->version_string);
@@ -670,16 +673,17 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
(int)(srcaddr->sa_family));
}
+ sbflags = cifs_sb_flags(cifs_sb);
seq_printf(s, ",uid=%u",
from_kuid_munged(&init_user_ns, cifs_sb->ctx->linux_uid));
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)
+ if (sbflags & CIFS_MOUNT_OVERR_UID)
seq_puts(s, ",forceuid");
else
seq_puts(s, ",noforceuid");
seq_printf(s, ",gid=%u",
from_kgid_munged(&init_user_ns, cifs_sb->ctx->linux_gid));
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)
+ if (sbflags & CIFS_MOUNT_OVERR_GID)
seq_puts(s, ",forcegid");
else
seq_puts(s, ",noforcegid");
@@ -722,53 +726,53 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
seq_puts(s, ",unix");
else
seq_puts(s, ",nounix");
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS)
+ if (sbflags & CIFS_MOUNT_NO_DFS)
seq_puts(s, ",nodfs");
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)
+ if (sbflags & CIFS_MOUNT_POSIX_PATHS)
seq_puts(s, ",posixpaths");
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)
+ if (sbflags & CIFS_MOUNT_SET_UID)
seq_puts(s, ",setuids");
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UID_FROM_ACL)
+ if (sbflags & CIFS_MOUNT_UID_FROM_ACL)
seq_puts(s, ",idsfromsid");
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)
+ if (sbflags & CIFS_MOUNT_SERVER_INUM)
seq_puts(s, ",serverino");
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
+ if (sbflags & CIFS_MOUNT_RWPIDFORWARD)
seq_puts(s, ",rwpidforward");
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL)
+ if (sbflags & CIFS_MOUNT_NOPOSIXBRL)
seq_puts(s, ",forcemand");
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR)
+ if (sbflags & CIFS_MOUNT_NO_XATTR)
seq_puts(s, ",nouser_xattr");
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR)
+ if (sbflags & CIFS_MOUNT_MAP_SPECIAL_CHR)
seq_puts(s, ",mapchars");
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SFM_CHR)
+ if (sbflags & CIFS_MOUNT_MAP_SFM_CHR)
seq_puts(s, ",mapposix");
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)
+ if (sbflags & CIFS_MOUNT_UNX_EMUL)
seq_puts(s, ",sfu");
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
+ if (sbflags & CIFS_MOUNT_NO_BRL)
seq_puts(s, ",nobrl");
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_HANDLE_CACHE)
+ if (sbflags & CIFS_MOUNT_NO_HANDLE_CACHE)
seq_puts(s, ",nohandlecache");
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID)
+ if (sbflags & CIFS_MOUNT_MODE_FROM_SID)
seq_puts(s, ",modefromsid");
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL)
+ if (sbflags & CIFS_MOUNT_CIFS_ACL)
seq_puts(s, ",cifsacl");
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)
+ if (sbflags & CIFS_MOUNT_DYNPERM)
seq_puts(s, ",dynperm");
if (root->d_sb->s_flags & SB_POSIXACL)
seq_puts(s, ",acl");
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS)
+ if (sbflags & CIFS_MOUNT_MF_SYMLINKS)
seq_puts(s, ",mfsymlinks");
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE)
+ if (sbflags & CIFS_MOUNT_FSCACHE)
seq_puts(s, ",fsc");
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)
+ if (sbflags & CIFS_MOUNT_NOSSYNC)
seq_puts(s, ",nostrictsync");
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM)
+ if (sbflags & CIFS_MOUNT_NO_PERM)
seq_puts(s, ",noperm");
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPUID)
+ if (sbflags & CIFS_MOUNT_CIFS_BACKUPUID)
seq_printf(s, ",backupuid=%u",
from_kuid_munged(&init_user_ns,
cifs_sb->ctx->backupuid));
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPGID)
+ if (sbflags & CIFS_MOUNT_CIFS_BACKUPGID)
seq_printf(s, ",backupgid=%u",
from_kgid_munged(&init_user_ns,
cifs_sb->ctx->backupgid));
@@ -909,10 +913,10 @@ static int cifs_write_inode(struct inode *inode, struct writeback_control *wbc)
static int cifs_drop_inode(struct inode *inode)
{
- struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ unsigned int sbflags = cifs_sb_flags(CIFS_SB(inode));
/* no serverino => unconditional eviction */
- return !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) ||
+ return !(sbflags & CIFS_MOUNT_SERVER_INUM) ||
inode_generic_drop(inode);
}
@@ -950,7 +954,7 @@ cifs_get_root(struct smb3_fs_context *ctx, struct super_block *sb)
char *s, *p;
char sep;
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH)
+ if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_USE_PREFIX_PATH)
return dget(sb->s_root);
full_path = cifs_build_path_to_root(ctx, cifs_sb,
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index 080ea601c209..6f9b6c72962b 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -1580,24 +1580,59 @@ CIFS_I(struct inode *inode)
return container_of(inode, struct cifsInodeInfo, netfs.inode);
}
-static inline struct cifs_sb_info *
-CIFS_SB(struct super_block *sb)
+static inline void *cinode_to_fsinfo(struct cifsInodeInfo *cinode)
+{
+ return cinode->netfs.inode.i_sb->s_fs_info;
+}
+
+static inline void *super_to_fsinfo(struct super_block *sb)
{
return sb->s_fs_info;
}
-static inline struct cifs_sb_info *
-CIFS_FILE_SB(struct file *file)
+static inline void *inode_to_fsinfo(struct inode *inode)
+{
+ return inode->i_sb->s_fs_info;
+}
+
+static inline void *file_to_fsinfo(struct file *file)
+{
+ return file_inode(file)->i_sb->s_fs_info;
+}
+
+static inline void *dentry_to_fsinfo(struct dentry *dentry)
+{
+ return dentry->d_sb->s_fs_info;
+}
+
+static inline void *const_dentry_to_fsinfo(const struct dentry *dentry)
{
- return CIFS_SB(file_inode(file)->i_sb);
+ return dentry->d_sb->s_fs_info;
+}
+
+#define CIFS_SB(_ptr) \
+ ((struct cifs_sb_info *) \
+ _Generic((_ptr), \
+ struct cifsInodeInfo * : cinode_to_fsinfo, \
+ const struct dentry * : const_dentry_to_fsinfo, \
+ struct super_block * : super_to_fsinfo, \
+ struct dentry * : dentry_to_fsinfo, \
+ struct inode * : inode_to_fsinfo, \
+ struct file * : file_to_fsinfo)(_ptr))
+
+/*
+ * Use atomic_t for @cifs_sb->mnt_cifs_flags as it is currently accessed
+ * locklessly and may be changed concurrently by mount/remount and reconnect
+ * paths.
+ */
+static inline unsigned int cifs_sb_flags(const struct cifs_sb_info *cifs_sb)
+{
+ return atomic_read(&cifs_sb->mnt_cifs_flags);
}
static inline char CIFS_DIR_SEP(const struct cifs_sb_info *cifs_sb)
{
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)
- return '/';
- else
- return '\\';
+ return (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_POSIX_PATHS) ? '/' : '\\';
}
static inline void
@@ -2314,9 +2349,8 @@ static inline bool __cifs_cache_state_check(struct cifsInodeInfo *cinode,
unsigned int oplock_flags,
unsigned int sb_flags)
{
- struct cifs_sb_info *cifs_sb = CIFS_SB(cinode->netfs.inode.i_sb);
+ unsigned int sflags = cifs_sb_flags(CIFS_SB(cinode));
unsigned int oplock = READ_ONCE(cinode->oplock);
- unsigned int sflags = cifs_sb->mnt_cifs_flags;
return (oplock & oplock_flags) || (sflags & sb_flags);
}
@@ -2336,4 +2370,9 @@ static inline void cifs_reset_oplock(struct cifsInodeInfo *cinode)
WRITE_ONCE(cinode->oplock, 0);
}
+static inline bool cifs_forced_shutdown(const struct cifs_sb_info *sbi)
+{
+ return cifs_sb_flags(sbi) & CIFS_MOUNT_SHUTDOWN;
+}
+
#endif /* _CIFS_GLOB_H */
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
index 33dfe116ca52..3bad2c5c523d 100644
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -2167,9 +2167,6 @@ void __cifs_put_smb_ses(struct cifs_ses *ses)
#ifdef CONFIG_KEYS
-/* strlen("cifs:a:") + CIFS_MAX_DOMAINNAME_LEN + 1 */
-#define CIFSCREDS_DESC_SIZE (7 + CIFS_MAX_DOMAINNAME_LEN + 1)
-
/* Populate username and pw fields from keyring if possible */
static int
cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses)
@@ -2177,6 +2174,7 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses)
int rc = 0;
int is_domain = 0;
const char *delim, *payload;
+ size_t desc_sz;
char *desc;
ssize_t len;
struct key *key;
@@ -2185,7 +2183,9 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses)
struct sockaddr_in6 *sa6;
const struct user_key_payload *upayload;
- desc = kmalloc(CIFSCREDS_DESC_SIZE, GFP_KERNEL);
+ /* "cifs:a:" and "cifs:d:" are the same length; +1 for NUL terminator */
+ desc_sz = strlen("cifs:a:") + CIFS_MAX_DOMAINNAME_LEN + 1;
+ desc = kmalloc(desc_sz, GFP_KERNEL);
if (!desc)
return -ENOMEM;
@@ -2193,11 +2193,11 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses)
switch (server->dstaddr.ss_family) {
case AF_INET:
sa = (struct sockaddr_in *)&server->dstaddr;
- sprintf(desc, "cifs:a:%pI4", &sa->sin_addr.s_addr);
+ snprintf(desc, desc_sz, "cifs:a:%pI4", &sa->sin_addr.s_addr);
break;
case AF_INET6:
sa6 = (struct sockaddr_in6 *)&server->dstaddr;
- sprintf(desc, "cifs:a:%pI6c", &sa6->sin6_addr.s6_addr);
+ snprintf(desc, desc_sz, "cifs:a:%pI6c", &sa6->sin6_addr.s6_addr);
break;
default:
cifs_dbg(FYI, "Bad ss_family (%hu)\n",
@@ -2216,7 +2216,7 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses)
}
/* didn't work, try to find a domain key */
- sprintf(desc, "cifs:d:%s", ses->domainName);
+ snprintf(desc, desc_sz, "cifs:d:%s", ses->domainName);
cifs_dbg(FYI, "%s: desc=%s\n", __func__, desc);
key = request_key(&key_type_logon, desc, "");
if (IS_ERR(key)) {
@@ -2236,7 +2236,6 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses)
/* find first : in payload */
payload = upayload->data;
delim = strnchr(payload, upayload->datalen, ':');
- cifs_dbg(FYI, "payload=%s\n", payload);
if (!delim) {
cifs_dbg(FYI, "Unable to find ':' in payload (datalen=%d)\n",
upayload->datalen);
@@ -2915,8 +2914,8 @@ compare_mount_options(struct super_block *sb, struct cifs_mnt_data *mnt_data)
{
struct cifs_sb_info *old = CIFS_SB(sb);
struct cifs_sb_info *new = mnt_data->cifs_sb;
- unsigned int oldflags = old->mnt_cifs_flags & CIFS_MOUNT_MASK;
- unsigned int newflags = new->mnt_cifs_flags & CIFS_MOUNT_MASK;
+ unsigned int oldflags = cifs_sb_flags(old) & CIFS_MOUNT_MASK;
+ unsigned int newflags = cifs_sb_flags(new) & CIFS_MOUNT_MASK;
if ((sb->s_flags & CIFS_MS_MASK) != (mnt_data->flags & CIFS_MS_MASK))
return 0;
@@ -2971,9 +2970,9 @@ static int match_prepath(struct super_block *sb,
struct smb3_fs_context *ctx = mnt_data->ctx;
struct cifs_sb_info *old = CIFS_SB(sb);
struct cifs_sb_info *new = mnt_data->cifs_sb;
- bool old_set = (old->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) &&
+ bool old_set = (cifs_sb_flags(old) & CIFS_MOUNT_USE_PREFIX_PATH) &&
old->prepath;
- bool new_set = (new->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) &&
+ bool new_set = (cifs_sb_flags(new) & CIFS_MOUNT_USE_PREFIX_PATH) &&
new->prepath;
if (tcon->origin_fullpath &&
@@ -3004,7 +3003,7 @@ cifs_match_super(struct super_block *sb, void *data)
cifs_sb = CIFS_SB(sb);
/* We do not want to use a superblock that has been shutdown */
- if (CIFS_MOUNT_SHUTDOWN & cifs_sb->mnt_cifs_flags) {
+ if (cifs_forced_shutdown(cifs_sb)) {
spin_unlock(&cifs_tcp_ses_lock);
return 0;
}
@@ -3469,6 +3468,8 @@ ip_connect(struct TCP_Server_Info *server)
int cifs_setup_cifs_sb(struct cifs_sb_info *cifs_sb)
{
struct smb3_fs_context *ctx = cifs_sb->ctx;
+ unsigned int sbflags;
+ int rc = 0;
INIT_DELAYED_WORK(&cifs_sb->prune_tlinks, cifs_prune_tlinks);
INIT_LIST_HEAD(&cifs_sb->tcon_sb_link);
@@ -3493,17 +3494,16 @@ int cifs_setup_cifs_sb(struct cifs_sb_info *cifs_sb)
}
ctx->local_nls = cifs_sb->local_nls;
- smb3_update_mnt_flags(cifs_sb);
+ sbflags = smb3_update_mnt_flags(cifs_sb);
if (ctx->direct_io)
cifs_dbg(FYI, "mounting share using direct i/o\n");
if (ctx->cache_ro) {
cifs_dbg(VFS, "mounting share with read only caching. Ensure that the share will not be modified while in use.\n");
- cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_RO_CACHE;
+ sbflags |= CIFS_MOUNT_RO_CACHE;
} else if (ctx->cache_rw) {
cifs_dbg(VFS, "mounting share in single client RW caching mode. Ensure that no other systems will be accessing the share.\n");
- cifs_sb->mnt_cifs_flags |= (CIFS_MOUNT_RO_CACHE |
- CIFS_MOUNT_RW_CACHE);
+ sbflags |= CIFS_MOUNT_RO_CACHE | CIFS_MOUNT_RW_CACHE;
}
if ((ctx->cifs_acl) && (ctx->dynperm))
@@ -3512,16 +3512,19 @@ int cifs_setup_cifs_sb(struct cifs_sb_info *cifs_sb)
if (ctx->prepath) {
cifs_sb->prepath = kstrdup(ctx->prepath, GFP_KERNEL);
if (cifs_sb->prepath == NULL)
- return -ENOMEM;
- cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH;
+ rc = -ENOMEM;
+ else
+ sbflags |= CIFS_MOUNT_USE_PREFIX_PATH;
}
- return 0;
+ atomic_set(&cifs_sb->mnt_cifs_flags, sbflags);
+ return rc;
}
/* Release all succeed connections */
void cifs_mount_put_conns(struct cifs_mount_ctx *mnt_ctx)
{
+ struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb;
int rc = 0;
if (mnt_ctx->tcon)
@@ -3533,7 +3536,7 @@ void cifs_mount_put_conns(struct cifs_mount_ctx *mnt_ctx)
mnt_ctx->ses = NULL;
mnt_ctx->tcon = NULL;
mnt_ctx->server = NULL;
- mnt_ctx->cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_POSIX_PATHS;
+ atomic_andnot(CIFS_MOUNT_POSIX_PATHS, &cifs_sb->mnt_cifs_flags);
free_xid(mnt_ctx->xid);
}
@@ -3587,19 +3590,23 @@ out:
int cifs_mount_get_tcon(struct cifs_mount_ctx *mnt_ctx)
{
struct TCP_Server_Info *server;
+ struct cifs_tcon *tcon = NULL;
struct cifs_sb_info *cifs_sb;
struct smb3_fs_context *ctx;
- struct cifs_tcon *tcon = NULL;
+ unsigned int sbflags;
int rc = 0;
- if (WARN_ON_ONCE(!mnt_ctx || !mnt_ctx->server || !mnt_ctx->ses || !mnt_ctx->fs_ctx ||
- !mnt_ctx->cifs_sb)) {
- rc = -EINVAL;
- goto out;
+ if (WARN_ON_ONCE(!mnt_ctx))
+ return -EINVAL;
+ if (WARN_ON_ONCE(!mnt_ctx->server || !mnt_ctx->ses ||
+ !mnt_ctx->fs_ctx || !mnt_ctx->cifs_sb)) {
+ mnt_ctx->tcon = NULL;
+ return -EINVAL;
}
server = mnt_ctx->server;
ctx = mnt_ctx->fs_ctx;
cifs_sb = mnt_ctx->cifs_sb;
+ sbflags = cifs_sb_flags(cifs_sb);
/* search for existing tcon to this server share */
tcon = cifs_get_tcon(mnt_ctx->ses, ctx);
@@ -3614,9 +3621,9 @@ int cifs_mount_get_tcon(struct cifs_mount_ctx *mnt_ctx)
* path (i.e., do not remap / and \ and do not map any special characters)
*/
if (tcon->posix_extensions) {
- cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_POSIX_PATHS;
- cifs_sb->mnt_cifs_flags &= ~(CIFS_MOUNT_MAP_SFM_CHR |
- CIFS_MOUNT_MAP_SPECIAL_CHR);
+ sbflags |= CIFS_MOUNT_POSIX_PATHS;
+ sbflags &= ~(CIFS_MOUNT_MAP_SFM_CHR |
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
}
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
@@ -3643,12 +3650,11 @@ int cifs_mount_get_tcon(struct cifs_mount_ctx *mnt_ctx)
/* do not care if a following call succeed - informational */
if (!tcon->pipe && server->ops->qfs_tcon) {
server->ops->qfs_tcon(mnt_ctx->xid, tcon, cifs_sb);
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RO_CACHE) {
+ if (sbflags & CIFS_MOUNT_RO_CACHE) {
if (tcon->fsDevInfo.DeviceCharacteristics &
cpu_to_le32(FILE_READ_ONLY_DEVICE))
cifs_dbg(VFS, "mounted to read only share\n");
- else if ((cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_RW_CACHE) == 0)
+ else if (!(sbflags & CIFS_MOUNT_RW_CACHE))
cifs_dbg(VFS, "read only mount of RW share\n");
/* no need to log a RW mount of a typical RW share */
}
@@ -3660,11 +3666,12 @@ int cifs_mount_get_tcon(struct cifs_mount_ctx *mnt_ctx)
* Inside cifs_fscache_get_super_cookie it checks
* that we do not get super cookie twice.
*/
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE)
+ if (sbflags & CIFS_MOUNT_FSCACHE)
cifs_fscache_get_super_cookie(tcon);
out:
mnt_ctx->tcon = tcon;
+ atomic_set(&cifs_sb->mnt_cifs_flags, sbflags);
return rc;
}
@@ -3783,7 +3790,8 @@ int cifs_is_path_remote(struct cifs_mount_ctx *mnt_ctx)
cifs_sb, full_path, tcon->Flags & SMB_SHARE_IS_IN_DFS);
if (rc != 0) {
cifs_server_dbg(VFS, "cannot query dirs between root and final path, enabling CIFS_MOUNT_USE_PREFIX_PATH\n");
- cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH;
+ atomic_or(CIFS_MOUNT_USE_PREFIX_PATH,
+ &cifs_sb->mnt_cifs_flags);
rc = 0;
}
}
@@ -3863,7 +3871,7 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
* Force the use of prefix path to support failover on DFS paths that resolve to targets
* that have different prefix paths.
*/
- cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH;
+ atomic_or(CIFS_MOUNT_USE_PREFIX_PATH, &cifs_sb->mnt_cifs_flags);
kfree(cifs_sb->prepath);
cifs_sb->prepath = ctx->prepath;
ctx->prepath = NULL;
@@ -4357,7 +4365,7 @@ cifs_sb_tlink(struct cifs_sb_info *cifs_sb)
kuid_t fsuid = current_fsuid();
int err;
- if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
+ if (!(cifs_sb_flags(cifs_sb) & CIFS_MOUNT_MULTIUSER))
return cifs_get_tlink(cifs_sb_master_tlink(cifs_sb));
spin_lock(&cifs_sb->tlink_tree_lock);
diff --git a/fs/smb/client/dfs_cache.c b/fs/smb/client/dfs_cache.c
index 983132735d72..83f8cf2f8d2b 100644
--- a/fs/smb/client/dfs_cache.c
+++ b/fs/smb/client/dfs_cache.c
@@ -1333,7 +1333,7 @@ int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb)
* Force the use of prefix path to support failover on DFS paths that resolve to targets
* that have different prefix paths.
*/
- cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH;
+ atomic_or(CIFS_MOUNT_USE_PREFIX_PATH, &cifs_sb->mnt_cifs_flags);
refresh_tcon_referral(tcon, true);
return 0;
diff --git a/fs/smb/client/dir.c b/fs/smb/client/dir.c
index cb10088197d2..953f1fee8cb8 100644
--- a/fs/smb/client/dir.c
+++ b/fs/smb/client/dir.c
@@ -82,10 +82,11 @@ char *__build_path_from_dentry_optional_prefix(struct dentry *direntry, void *pa
const char *tree, int tree_len,
bool prefix)
{
- int dfsplen;
- int pplen = 0;
- struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(direntry);
+ unsigned int sbflags = cifs_sb_flags(cifs_sb);
char dirsep = CIFS_DIR_SEP(cifs_sb);
+ int pplen = 0;
+ int dfsplen;
char *s;
if (unlikely(!page))
@@ -96,7 +97,7 @@ char *__build_path_from_dentry_optional_prefix(struct dentry *direntry, void *pa
else
dfsplen = 0;
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH)
+ if (sbflags & CIFS_MOUNT_USE_PREFIX_PATH)
pplen = cifs_sb->prepath ? strlen(cifs_sb->prepath) + 1 : 0;
s = dentry_path_raw(direntry, page, PATH_MAX);
@@ -123,7 +124,7 @@ char *__build_path_from_dentry_optional_prefix(struct dentry *direntry, void *pa
if (dfsplen) {
s -= dfsplen;
memcpy(s, tree, dfsplen);
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) {
+ if (sbflags & CIFS_MOUNT_POSIX_PATHS) {
int i;
for (i = 0; i < dfsplen; i++) {
if (s[i] == '\\')
@@ -152,7 +153,7 @@ char *build_path_from_dentry_optional_prefix(struct dentry *direntry, void *page
static int
check_name(struct dentry *direntry, struct cifs_tcon *tcon)
{
- struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(direntry);
int i;
if (unlikely(tcon->fsAttrInfo.MaxPathNameComponentLength &&
@@ -160,7 +161,7 @@ check_name(struct dentry *direntry, struct cifs_tcon *tcon)
le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength)))
return -ENAMETOOLONG;
- if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)) {
+ if (!(cifs_sb_flags(cifs_sb) & CIFS_MOUNT_POSIX_PATHS)) {
for (i = 0; i < direntry->d_name.len; i++) {
if (direntry->d_name.name[i] == '\\') {
cifs_dbg(FYI, "Invalid file name\n");
@@ -181,11 +182,12 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned
int rc = -ENOENT;
int create_options = CREATE_NOT_DIR;
int desired_access;
- struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode);
struct cifs_tcon *tcon = tlink_tcon(tlink);
const char *full_path;
void *page = alloc_dentry_path();
struct inode *newinode = NULL;
+ unsigned int sbflags;
int disposition;
struct TCP_Server_Info *server = tcon->ses->server;
struct cifs_open_parms oparms;
@@ -365,6 +367,7 @@ retry_open:
* If Open reported that we actually created a file then we now have to
* set the mode if possible.
*/
+ sbflags = cifs_sb_flags(cifs_sb);
if ((tcon->unix_ext) && (*oplock & CIFS_CREATE_ACTION)) {
struct cifs_unix_set_info_args args = {
.mode = mode,
@@ -374,7 +377,7 @@ retry_open:
.device = 0,
};
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
+ if (sbflags & CIFS_MOUNT_SET_UID) {
args.uid = current_fsuid();
if (inode->i_mode & S_ISGID)
args.gid = inode->i_gid;
@@ -411,9 +414,9 @@ cifs_create_get_file_info:
if (server->ops->set_lease_key)
server->ops->set_lease_key(newinode, fid);
if ((*oplock & CIFS_CREATE_ACTION) && S_ISREG(newinode->i_mode)) {
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)
+ if (sbflags & CIFS_MOUNT_DYNPERM)
newinode->i_mode = mode;
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
+ if (sbflags & CIFS_MOUNT_SET_UID) {
newinode->i_uid = current_fsuid();
if (inode->i_mode & S_ISGID)
newinode->i_gid = inode->i_gid;
@@ -458,18 +461,20 @@ int
cifs_atomic_open(struct inode *inode, struct dentry *direntry,
struct file *file, unsigned int oflags, umode_t mode)
{
- int rc;
- unsigned int xid;
- struct tcon_link *tlink;
- struct cifs_tcon *tcon;
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode);
+ struct cifs_open_info_data buf = {};
struct TCP_Server_Info *server;
- struct cifs_fid fid = {};
+ struct cifsFileInfo *file_info;
struct cifs_pending_open open;
+ struct cifs_fid fid = {};
+ struct tcon_link *tlink;
+ struct cifs_tcon *tcon;
+ unsigned int sbflags;
+ unsigned int xid;
__u32 oplock;
- struct cifsFileInfo *file_info;
- struct cifs_open_info_data buf = {};
+ int rc;
- if (unlikely(cifs_forced_shutdown(CIFS_SB(inode->i_sb))))
+ if (unlikely(cifs_forced_shutdown(cifs_sb)))
return smb_EIO(smb_eio_trace_forced_shutdown);
/*
@@ -499,7 +504,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
cifs_dbg(FYI, "parent inode = 0x%p name is: %pd and dentry = 0x%p\n",
inode, direntry, direntry);
- tlink = cifs_sb_tlink(CIFS_SB(inode->i_sb));
+ tlink = cifs_sb_tlink(cifs_sb);
if (IS_ERR(tlink)) {
rc = PTR_ERR(tlink);
goto out_free_xid;
@@ -536,13 +541,13 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
goto out;
}
- if (file->f_flags & O_DIRECT &&
- CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
- if (CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
+ sbflags = cifs_sb_flags(cifs_sb);
+ if ((file->f_flags & O_DIRECT) && (sbflags & CIFS_MOUNT_STRICT_IO)) {
+ if (sbflags & CIFS_MOUNT_NO_BRL)
file->f_op = &cifs_file_direct_nobrl_ops;
else
file->f_op = &cifs_file_direct_ops;
- }
+ }
file_info = cifs_new_fileinfo(&fid, file, tlink, oplock, buf.symlink_target);
if (file_info == NULL) {
diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
index 18f31d4eb98d..f3ddcdf406c8 100644
--- a/fs/smb/client/file.c
+++ b/fs/smb/client/file.c
@@ -270,7 +270,7 @@ static void cifs_begin_writeback(struct netfs_io_request *wreq)
static int cifs_init_request(struct netfs_io_request *rreq, struct file *file)
{
struct cifs_io_request *req = container_of(rreq, struct cifs_io_request, rreq);
- struct cifs_sb_info *cifs_sb = CIFS_SB(rreq->inode->i_sb);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(rreq->inode);
struct cifsFileInfo *open_file = NULL;
rreq->rsize = cifs_sb->ctx->rsize;
@@ -281,7 +281,7 @@ static int cifs_init_request(struct netfs_io_request *rreq, struct file *file)
open_file = file->private_data;
rreq->netfs_priv = file->private_data;
req->cfile = cifsFileInfo_get(open_file);
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
+ if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_RWPIDFORWARD)
req->pid = req->cfile->pid;
} else if (rreq->origin != NETFS_WRITEBACK) {
WARN_ON_ONCE(1);
@@ -906,7 +906,7 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
* close because it may cause a error when we open this file
* again and get at least level II oplock.
*/
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
+ if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_STRICT_IO)
set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
cifs_set_oplock_level(cifsi, 0);
}
@@ -955,11 +955,11 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
int cifs_file_flush(const unsigned int xid, struct inode *inode,
struct cifsFileInfo *cfile)
{
- struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode);
struct cifs_tcon *tcon;
int rc;
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)
+ if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NOSSYNC)
return 0;
if (cfile && (OPEN_FMODE(cfile->f_flags) & FMODE_WRITE)) {
@@ -1015,24 +1015,24 @@ static int cifs_do_truncate(const unsigned int xid, struct dentry *dentry)
int cifs_open(struct inode *inode, struct file *file)
{
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode);
+ struct cifs_open_info_data data = {};
+ struct cifsFileInfo *cfile = NULL;
+ struct TCP_Server_Info *server;
+ struct cifs_pending_open open;
+ bool posix_open_ok = false;
+ struct cifs_fid fid = {};
+ struct tcon_link *tlink;
+ struct cifs_tcon *tcon;
+ const char *full_path;
+ unsigned int sbflags;
int rc = -EACCES;
unsigned int xid;
__u32 oplock;
- struct cifs_sb_info *cifs_sb;
- struct TCP_Server_Info *server;
- struct cifs_tcon *tcon;
- struct tcon_link *tlink;
- struct cifsFileInfo *cfile = NULL;
void *page;
- const char *full_path;
- bool posix_open_ok = false;
- struct cifs_fid fid = {};
- struct cifs_pending_open open;
- struct cifs_open_info_data data = {};
xid = get_xid();
- cifs_sb = CIFS_SB(inode->i_sb);
if (unlikely(cifs_forced_shutdown(cifs_sb))) {
free_xid(xid);
return smb_EIO(smb_eio_trace_forced_shutdown);
@@ -1056,9 +1056,9 @@ int cifs_open(struct inode *inode, struct file *file)
cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
inode, file->f_flags, full_path);
- if (file->f_flags & O_DIRECT &&
- cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
+ sbflags = cifs_sb_flags(cifs_sb);
+ if ((file->f_flags & O_DIRECT) && (sbflags & CIFS_MOUNT_STRICT_IO)) {
+ if (sbflags & CIFS_MOUNT_NO_BRL)
file->f_op = &cifs_file_direct_nobrl_ops;
else
file->f_op = &cifs_file_direct_ops;
@@ -1209,7 +1209,7 @@ cifs_relock_file(struct cifsFileInfo *cfile)
struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
int rc = 0;
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
- struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(cinode);
#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
@@ -1222,7 +1222,7 @@ cifs_relock_file(struct cifsFileInfo *cfile)
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
if (cap_unix(tcon->ses) &&
(CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
- ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
+ ((cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NOPOSIXBRL) == 0))
rc = cifs_push_posix_locks(cfile);
else
#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
@@ -2011,7 +2011,7 @@ cifs_push_locks(struct cifsFileInfo *cfile)
struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
int rc = 0;
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
- struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(cinode);
#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
/* we are going to update can_cache_brlcks here - need a write access */
@@ -2024,7 +2024,7 @@ cifs_push_locks(struct cifsFileInfo *cfile)
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
if (cap_unix(tcon->ses) &&
(CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
- ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
+ ((cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NOPOSIXBRL) == 0))
rc = cifs_push_posix_locks(cfile);
else
#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
@@ -2428,11 +2428,11 @@ int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
tcon->ses->server);
- cifs_sb = CIFS_FILE_SB(file);
+ cifs_sb = CIFS_SB(file);
if (cap_unix(tcon->ses) &&
(CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
- ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
+ ((cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NOPOSIXBRL) == 0))
posix_lck = true;
if (!lock && !unlock) {
@@ -2455,14 +2455,14 @@ int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
{
- int rc, xid;
+ struct cifs_sb_info *cifs_sb = CIFS_SB(file);
+ struct cifsFileInfo *cfile;
int lock = 0, unlock = 0;
bool wait_flag = false;
bool posix_lck = false;
- struct cifs_sb_info *cifs_sb;
struct cifs_tcon *tcon;
- struct cifsFileInfo *cfile;
__u32 type;
+ int rc, xid;
rc = -EACCES;
xid = get_xid();
@@ -2477,12 +2477,11 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
tcon->ses->server);
- cifs_sb = CIFS_FILE_SB(file);
set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
if (cap_unix(tcon->ses) &&
(CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
- ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
+ ((cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NOPOSIXBRL) == 0))
posix_lck = true;
/*
* BB add code here to normalize offset and length to account for
@@ -2532,11 +2531,11 @@ void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t
struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
bool fsuid_only)
{
+ struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode);
struct cifsFileInfo *open_file = NULL;
- struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
/* only filter by fsuid on multiuser mounts */
- if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
+ if (!(cifs_sb_flags(cifs_sb) & CIFS_MOUNT_MULTIUSER))
fsuid_only = false;
spin_lock(&cifs_inode->open_file_lock);
@@ -2589,10 +2588,10 @@ cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
return rc;
}
- cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
+ cifs_sb = CIFS_SB(cifs_inode);
/* only filter by fsuid on multiuser mounts */
- if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
+ if (!(cifs_sb_flags(cifs_sb) & CIFS_MOUNT_MULTIUSER))
fsuid_only = false;
spin_lock(&cifs_inode->open_file_lock);
@@ -2787,7 +2786,7 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
struct TCP_Server_Info *server;
struct cifsFileInfo *smbfile = file->private_data;
struct inode *inode = file_inode(file);
- struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(file);
rc = file_write_and_wait_range(file, start, end);
if (rc) {
@@ -2801,7 +2800,7 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
file, datasync);
tcon = tlink_tcon(smbfile->tlink);
- if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
+ if (!(cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NOSSYNC)) {
server = tcon->ses->server;
if (server->ops->flush == NULL) {
rc = -ENOSYS;
@@ -2853,7 +2852,7 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from)
struct inode *inode = file->f_mapping->host;
struct cifsInodeInfo *cinode = CIFS_I(inode);
struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
- struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode);
ssize_t rc;
rc = netfs_start_io_write(inode);
@@ -2870,7 +2869,7 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from)
if (rc <= 0)
goto out;
- if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) &&
+ if ((cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NOPOSIXBRL) &&
(cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
server->vals->exclusive_lock_type, 0,
NULL, CIFS_WRITE_OP))) {
@@ -2893,7 +2892,7 @@ cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
{
struct inode *inode = file_inode(iocb->ki_filp);
struct cifsInodeInfo *cinode = CIFS_I(inode);
- struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode);
struct cifsFileInfo *cfile = (struct cifsFileInfo *)
iocb->ki_filp->private_data;
struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
@@ -2906,7 +2905,7 @@ cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
if (CIFS_CACHE_WRITE(cinode)) {
if (cap_unix(tcon->ses) &&
(CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
- ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
+ ((cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
written = netfs_file_write_iter(iocb, from);
goto out;
}
@@ -2994,7 +2993,7 @@ cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
{
struct inode *inode = file_inode(iocb->ki_filp);
struct cifsInodeInfo *cinode = CIFS_I(inode);
- struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode);
struct cifsFileInfo *cfile = (struct cifsFileInfo *)
iocb->ki_filp->private_data;
struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
@@ -3011,7 +3010,7 @@ cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
if (!CIFS_CACHE_READ(cinode))
return netfs_unbuffered_read_iter(iocb, to);
- if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0) {
+ if ((cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NOPOSIXBRL) == 0) {
if (iocb->ki_flags & IOCB_DIRECT)
return netfs_unbuffered_read_iter(iocb, to);
return netfs_buffered_read_iter(iocb, to);
@@ -3130,10 +3129,9 @@ bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file,
if (is_inode_writable(cifsInode) ||
((cifsInode->oplock & CIFS_CACHE_RW_FLG) != 0 && from_readdir)) {
/* This inode is open for write at least once */
- struct cifs_sb_info *cifs_sb;
+ struct cifs_sb_info *cifs_sb = CIFS_SB(cifsInode);
- cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb);
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
+ if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_DIRECT_IO) {
/* since no page cache to corrupt on directio
we can change size safely */
return true;
@@ -3181,7 +3179,7 @@ void cifs_oplock_break(struct work_struct *work)
server = tcon->ses->server;
scoped_guard(spinlock, &cinode->open_file_lock) {
- unsigned int sbflags = cifs_sb->mnt_cifs_flags;
+ unsigned int sbflags = cifs_sb_flags(cifs_sb);
server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
cfile->oplock_epoch, &purge_cache);
diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c
index 09fe749e97ee..54090739535f 100644
--- a/fs/smb/client/fs_context.c
+++ b/fs/smb/client/fs_context.c
@@ -2062,161 +2062,160 @@ smb3_cleanup_fs_context(struct smb3_fs_context *ctx)
kfree(ctx);
}
-void smb3_update_mnt_flags(struct cifs_sb_info *cifs_sb)
+unsigned int smb3_update_mnt_flags(struct cifs_sb_info *cifs_sb)
{
+ unsigned int sbflags = cifs_sb_flags(cifs_sb);
struct smb3_fs_context *ctx = cifs_sb->ctx;
if (ctx->nodfs)
- cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_DFS;
+ sbflags |= CIFS_MOUNT_NO_DFS;
else
- cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NO_DFS;
+ sbflags &= ~CIFS_MOUNT_NO_DFS;
if (ctx->noperm)
- cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM;
+ sbflags |= CIFS_MOUNT_NO_PERM;
else
- cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NO_PERM;
+ sbflags &= ~CIFS_MOUNT_NO_PERM;
if (ctx->setuids)
- cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_SET_UID;
+ sbflags |= CIFS_MOUNT_SET_UID;
else
- cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SET_UID;
+ sbflags &= ~CIFS_MOUNT_SET_UID;
if (ctx->setuidfromacl)
- cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_UID_FROM_ACL;
+ sbflags |= CIFS_MOUNT_UID_FROM_ACL;
else
- cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_UID_FROM_ACL;
+ sbflags &= ~CIFS_MOUNT_UID_FROM_ACL;
if (ctx->server_ino)
- cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_SERVER_INUM;
+ sbflags |= CIFS_MOUNT_SERVER_INUM;
else
- cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SERVER_INUM;
+ sbflags &= ~CIFS_MOUNT_SERVER_INUM;
if (ctx->remap)
- cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MAP_SFM_CHR;
+ sbflags |= CIFS_MOUNT_MAP_SFM_CHR;
else
- cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_MAP_SFM_CHR;
+ sbflags &= ~CIFS_MOUNT_MAP_SFM_CHR;
if (ctx->sfu_remap)
- cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MAP_SPECIAL_CHR;
+ sbflags |= CIFS_MOUNT_MAP_SPECIAL_CHR;
else
- cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_MAP_SPECIAL_CHR;
+ sbflags &= ~CIFS_MOUNT_MAP_SPECIAL_CHR;
if (ctx->no_xattr)
- cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_XATTR;
+ sbflags |= CIFS_MOUNT_NO_XATTR;
else
- cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NO_XATTR;
+ sbflags &= ~CIFS_MOUNT_NO_XATTR;
if (ctx->sfu_emul)
- cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_UNX_EMUL;
+ sbflags |= CIFS_MOUNT_UNX_EMUL;
else
- cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_UNX_EMUL;
+ sbflags &= ~CIFS_MOUNT_UNX_EMUL;
if (ctx->nobrl)
- cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_BRL;
+ sbflags |= CIFS_MOUNT_NO_BRL;
else
- cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NO_BRL;
+ sbflags &= ~CIFS_MOUNT_NO_BRL;
if (ctx->nohandlecache)
- cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_HANDLE_CACHE;
+ sbflags |= CIFS_MOUNT_NO_HANDLE_CACHE;
else
- cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NO_HANDLE_CACHE;
+ sbflags &= ~CIFS_MOUNT_NO_HANDLE_CACHE;
if (ctx->nostrictsync)
- cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NOSSYNC;
+ sbflags |= CIFS_MOUNT_NOSSYNC;
else
- cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NOSSYNC;
+ sbflags &= ~CIFS_MOUNT_NOSSYNC;
if (ctx->mand_lock)
- cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NOPOSIXBRL;
+ sbflags |= CIFS_MOUNT_NOPOSIXBRL;
else
- cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NOPOSIXBRL;
+ sbflags &= ~CIFS_MOUNT_NOPOSIXBRL;
if (ctx->rwpidforward)
- cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_RWPIDFORWARD;
+ sbflags |= CIFS_MOUNT_RWPIDFORWARD;
else
- cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_RWPIDFORWARD;
+ sbflags &= ~CIFS_MOUNT_RWPIDFORWARD;
if (ctx->mode_ace)
- cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MODE_FROM_SID;
+ sbflags |= CIFS_MOUNT_MODE_FROM_SID;
else
- cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_MODE_FROM_SID;
+ sbflags &= ~CIFS_MOUNT_MODE_FROM_SID;
if (ctx->cifs_acl)
- cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_ACL;
+ sbflags |= CIFS_MOUNT_CIFS_ACL;
else
- cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_CIFS_ACL;
+ sbflags &= ~CIFS_MOUNT_CIFS_ACL;
if (ctx->backupuid_specified)
- cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_BACKUPUID;
+ sbflags |= CIFS_MOUNT_CIFS_BACKUPUID;
else
- cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_CIFS_BACKUPUID;
+ sbflags &= ~CIFS_MOUNT_CIFS_BACKUPUID;
if (ctx->backupgid_specified)
- cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_BACKUPGID;
+ sbflags |= CIFS_MOUNT_CIFS_BACKUPGID;
else
- cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_CIFS_BACKUPGID;
+ sbflags &= ~CIFS_MOUNT_CIFS_BACKUPGID;
if (ctx->override_uid)
- cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_UID;
+ sbflags |= CIFS_MOUNT_OVERR_UID;
else
- cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_OVERR_UID;
+ sbflags &= ~CIFS_MOUNT_OVERR_UID;
if (ctx->override_gid)
- cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_GID;
+ sbflags |= CIFS_MOUNT_OVERR_GID;
else
- cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_OVERR_GID;
+ sbflags &= ~CIFS_MOUNT_OVERR_GID;
if (ctx->dynperm)
- cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DYNPERM;
+ sbflags |= CIFS_MOUNT_DYNPERM;
else
- cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_DYNPERM;
+ sbflags &= ~CIFS_MOUNT_DYNPERM;
if (ctx->fsc)
- cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_FSCACHE;
+ sbflags |= CIFS_MOUNT_FSCACHE;
else
- cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_FSCACHE;
+ sbflags &= ~CIFS_MOUNT_FSCACHE;
if (ctx->multiuser)
- cifs_sb->mnt_cifs_flags |= (CIFS_MOUNT_MULTIUSER |
- CIFS_MOUNT_NO_PERM);
+ sbflags |= CIFS_MOUNT_MULTIUSER | CIFS_MOUNT_NO_PERM;
else
- cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_MULTIUSER;
+ sbflags &= ~CIFS_MOUNT_MULTIUSER;
if (ctx->strict_io)
- cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_STRICT_IO;
+ sbflags |= CIFS_MOUNT_STRICT_IO;
else
- cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_STRICT_IO;
+ sbflags &= ~CIFS_MOUNT_STRICT_IO;
if (ctx->direct_io)
- cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO;
+ sbflags |= CIFS_MOUNT_DIRECT_IO;
else
- cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_DIRECT_IO;
+ sbflags &= ~CIFS_MOUNT_DIRECT_IO;
if (ctx->mfsymlinks)
- cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MF_SYMLINKS;
+ sbflags |= CIFS_MOUNT_MF_SYMLINKS;
else
- cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_MF_SYMLINKS;
- if (ctx->mfsymlinks) {
- if (ctx->sfu_emul) {
- /*
- * Our SFU ("Services for Unix") emulation allows now
- * creating new and reading existing SFU symlinks.
- * Older Linux kernel versions were not able to neither
- * read existing nor create new SFU symlinks. But
- * creating and reading SFU style mknod and FIFOs was
- * supported for long time. When "mfsymlinks" and
- * "sfu" are both enabled at the same time, it allows
- * reading both types of symlinks, but will only create
- * them with mfsymlinks format. This allows better
- * Apple compatibility, compatibility with older Linux
- * kernel clients (probably better for Samba too)
- * while still recognizing old Windows style symlinks.
- */
- cifs_dbg(VFS, "mount options mfsymlinks and sfu both enabled\n");
- }
- }
- cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SHUTDOWN;
+ sbflags &= ~CIFS_MOUNT_MF_SYMLINKS;
- return;
+ if (ctx->mfsymlinks && ctx->sfu_emul) {
+ /*
+ * Our SFU ("Services for Unix") emulation allows now
+ * creating new and reading existing SFU symlinks.
+ * Older Linux kernel versions were not able to neither
+ * read existing nor create new SFU symlinks. But
+ * creating and reading SFU style mknod and FIFOs was
+ * supported for long time. When "mfsymlinks" and
+ * "sfu" are both enabled at the same time, it allows
+ * reading both types of symlinks, but will only create
+ * them with mfsymlinks format. This allows better
+ * Apple compatibility, compatibility with older Linux
+ * kernel clients (probably better for Samba too)
+ * while still recognizing old Windows style symlinks.
+ */
+ cifs_dbg(VFS, "mount options mfsymlinks and sfu both enabled\n");
+ }
+ sbflags &= ~CIFS_MOUNT_SHUTDOWN;
+ atomic_set(&cifs_sb->mnt_cifs_flags, sbflags);
+ return sbflags;
}
diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h
index 49b2a6f09ca2..0b64fcb5d302 100644
--- a/fs/smb/client/fs_context.h
+++ b/fs/smb/client/fs_context.h
@@ -374,7 +374,7 @@ int smb3_fs_context_dup(struct smb3_fs_context *new_ctx,
struct smb3_fs_context *ctx);
int smb3_sync_session_ctx_passwords(struct cifs_sb_info *cifs_sb,
struct cifs_ses *ses);
-void smb3_update_mnt_flags(struct cifs_sb_info *cifs_sb);
+unsigned int smb3_update_mnt_flags(struct cifs_sb_info *cifs_sb);
/*
* max deferred close timeout (jiffies) - 2^30
diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c
index d4d3cfeb6c90..3e844c55ab8a 100644
--- a/fs/smb/client/inode.c
+++ b/fs/smb/client/inode.c
@@ -40,32 +40,33 @@ static void cifs_set_netfs_context(struct inode *inode)
static void cifs_set_ops(struct inode *inode)
{
- struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode);
+ struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
struct netfs_inode *ictx = netfs_inode(inode);
+ unsigned int sbflags = cifs_sb_flags(cifs_sb);
switch (inode->i_mode & S_IFMT) {
case S_IFREG:
inode->i_op = &cifs_file_inode_ops;
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
+ if (sbflags & CIFS_MOUNT_DIRECT_IO) {
set_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags);
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
+ if (sbflags & CIFS_MOUNT_NO_BRL)
inode->i_fop = &cifs_file_direct_nobrl_ops;
else
inode->i_fop = &cifs_file_direct_ops;
- } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
+ } else if (sbflags & CIFS_MOUNT_STRICT_IO) {
+ if (sbflags & CIFS_MOUNT_NO_BRL)
inode->i_fop = &cifs_file_strict_nobrl_ops;
else
inode->i_fop = &cifs_file_strict_ops;
- } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
+ } else if (sbflags & CIFS_MOUNT_NO_BRL)
inode->i_fop = &cifs_file_nobrl_ops;
else { /* not direct, send byte range locks */
inode->i_fop = &cifs_file_ops;
}
/* check if server can support readahead */
- if (cifs_sb_master_tcon(cifs_sb)->ses->server->max_read <
- PAGE_SIZE + MAX_CIFS_HDR_SIZE)
+ if (tcon->ses->server->max_read < PAGE_SIZE + MAX_CIFS_HDR_SIZE)
inode->i_data.a_ops = &cifs_addr_ops_smallbuf;
else
inode->i_data.a_ops = &cifs_addr_ops;
@@ -194,8 +195,8 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr,
inode->i_gid = fattr->cf_gid;
/* if dynperm is set, don't clobber existing mode */
- if (inode_state_read(inode) & I_NEW ||
- !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM))
+ if ((inode_state_read(inode) & I_NEW) ||
+ !(cifs_sb_flags(cifs_sb) & CIFS_MOUNT_DYNPERM))
inode->i_mode = fattr->cf_mode;
cifs_i->cifsAttrs = fattr->cf_cifsattrs;
@@ -248,10 +249,8 @@ cifs_fill_uniqueid(struct super_block *sb, struct cifs_fattr *fattr)
{
struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)
- return;
-
- fattr->cf_uniqueid = iunique(sb, ROOT_I);
+ if (!(cifs_sb_flags(cifs_sb) & CIFS_MOUNT_SERVER_INUM))
+ fattr->cf_uniqueid = iunique(sb, ROOT_I);
}
/* Fill a cifs_fattr struct with info from FILE_UNIX_BASIC_INFO. */
@@ -259,6 +258,8 @@ void
cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info,
struct cifs_sb_info *cifs_sb)
{
+ unsigned int sbflags;
+
memset(fattr, 0, sizeof(*fattr));
fattr->cf_uniqueid = le64_to_cpu(info->UniqueId);
fattr->cf_bytes = le64_to_cpu(info->NumOfBytes);
@@ -317,8 +318,9 @@ cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info,
break;
}
+ sbflags = cifs_sb_flags(cifs_sb);
fattr->cf_uid = cifs_sb->ctx->linux_uid;
- if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)) {
+ if (!(sbflags & CIFS_MOUNT_OVERR_UID)) {
u64 id = le64_to_cpu(info->Uid);
if (id < ((uid_t)-1)) {
kuid_t uid = make_kuid(&init_user_ns, id);
@@ -328,7 +330,7 @@ cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info,
}
fattr->cf_gid = cifs_sb->ctx->linux_gid;
- if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)) {
+ if (!(sbflags & CIFS_MOUNT_OVERR_GID)) {
u64 id = le64_to_cpu(info->Gid);
if (id < ((gid_t)-1)) {
kgid_t gid = make_kgid(&init_user_ns, id);
@@ -382,7 +384,7 @@ static int update_inode_info(struct super_block *sb,
*
* If file type or uniqueid is different, return error.
*/
- if (unlikely((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) &&
+ if (unlikely((cifs_sb_flags(cifs_sb) & CIFS_MOUNT_SERVER_INUM) &&
CIFS_I(*inode)->uniqueid != fattr->cf_uniqueid)) {
CIFS_I(*inode)->time = 0; /* force reval */
return -ESTALE;
@@ -468,7 +470,7 @@ static int cifs_get_unix_fattr(const unsigned char *full_path,
cifs_fill_uniqueid(sb, fattr);
/* check for Minshall+French symlinks */
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) {
+ if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_MF_SYMLINKS) {
tmprc = check_mf_symlink(xid, tcon, cifs_sb, fattr, full_path);
cifs_dbg(FYI, "check_mf_symlink: %d\n", tmprc);
}
@@ -1081,7 +1083,7 @@ cifs_backup_query_path_info(int xid,
else if ((tcon->ses->capabilities &
tcon->ses->server->vals->cap_nt_find) == 0)
info.info_level = SMB_FIND_FILE_INFO_STANDARD;
- else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)
+ else if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_SERVER_INUM)
info.info_level = SMB_FIND_FILE_ID_FULL_DIR_INFO;
else /* no srvino useful for fallback to some netapp */
info.info_level = SMB_FIND_FILE_DIRECTORY_INFO;
@@ -1109,7 +1111,7 @@ static void cifs_set_fattr_ino(int xid, struct cifs_tcon *tcon, struct super_blo
struct TCP_Server_Info *server = tcon->ses->server;
int rc;
- if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)) {
+ if (!(cifs_sb_flags(cifs_sb) & CIFS_MOUNT_SERVER_INUM)) {
if (*inode)
fattr->cf_uniqueid = CIFS_I(*inode)->uniqueid;
else
@@ -1263,14 +1265,15 @@ static int cifs_get_fattr(struct cifs_open_info_data *data,
struct inode **inode,
const char *full_path)
{
+ struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
struct cifs_open_info_data tmp_data = {};
- struct cifs_tcon *tcon;
+ void *smb1_backup_rsp_buf = NULL;
struct TCP_Server_Info *server;
+ struct cifs_tcon *tcon;
struct tcon_link *tlink;
- struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
- void *smb1_backup_rsp_buf = NULL;
- int rc = 0;
+ unsigned int sbflags;
int tmprc = 0;
+ int rc = 0;
tlink = cifs_sb_tlink(cifs_sb);
if (IS_ERR(tlink))
@@ -1370,16 +1373,17 @@ static int cifs_get_fattr(struct cifs_open_info_data *data,
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
handle_mnt_opt:
#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
+ sbflags = cifs_sb_flags(cifs_sb);
/* query for SFU type info if supported and needed */
if ((fattr->cf_cifsattrs & ATTR_SYSTEM) &&
- (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) {
+ (sbflags & CIFS_MOUNT_UNX_EMUL)) {
tmprc = cifs_sfu_type(fattr, full_path, cifs_sb, xid);
if (tmprc)
cifs_dbg(FYI, "cifs_sfu_type failed: %d\n", tmprc);
}
/* fill in 0777 bits from ACL */
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID) {
+ if (sbflags & CIFS_MOUNT_MODE_FROM_SID) {
rc = cifs_acl_to_fattr(cifs_sb, fattr, *inode,
true, full_path, fid);
if (rc == -EREMOTE)
@@ -1389,7 +1393,7 @@ handle_mnt_opt:
__func__, rc);
goto out;
}
- } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
+ } else if (sbflags & CIFS_MOUNT_CIFS_ACL) {
rc = cifs_acl_to_fattr(cifs_sb, fattr, *inode,
false, full_path, fid);
if (rc == -EREMOTE)
@@ -1399,7 +1403,7 @@ handle_mnt_opt:
__func__, rc);
goto out;
}
- } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)
+ } else if (sbflags & CIFS_MOUNT_UNX_EMUL)
/* fill in remaining high mode bits e.g. SUID, VTX */
cifs_sfu_mode(fattr, full_path, cifs_sb, xid);
else if (!(tcon->posix_extensions))
@@ -1409,7 +1413,7 @@ handle_mnt_opt:
/* check for Minshall+French symlinks */
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) {
+ if (sbflags & CIFS_MOUNT_MF_SYMLINKS) {
tmprc = check_mf_symlink(xid, tcon, cifs_sb, fattr, full_path);
cifs_dbg(FYI, "check_mf_symlink: %d\n", tmprc);
}
@@ -1509,7 +1513,7 @@ static int smb311_posix_get_fattr(struct cifs_open_info_data *data,
* 3. Tweak fattr based on mount options
*/
/* check for Minshall+French symlinks */
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) {
+ if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_MF_SYMLINKS) {
tmprc = check_mf_symlink(xid, tcon, cifs_sb, fattr, full_path);
cifs_dbg(FYI, "check_mf_symlink: %d\n", tmprc);
}
@@ -1660,7 +1664,7 @@ struct inode *cifs_root_iget(struct super_block *sb)
int len;
int rc;
- if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH)
+ if ((cifs_sb_flags(cifs_sb) & CIFS_MOUNT_USE_PREFIX_PATH)
&& cifs_sb->prepath) {
len = strlen(cifs_sb->prepath);
path = kzalloc(len + 2 /* leading sep + null */, GFP_KERNEL);
@@ -2098,8 +2102,9 @@ cifs_mkdir_qinfo(struct inode *parent, struct dentry *dentry, umode_t mode,
const char *full_path, struct cifs_sb_info *cifs_sb,
struct cifs_tcon *tcon, const unsigned int xid)
{
- int rc = 0;
struct inode *inode = NULL;
+ unsigned int sbflags;
+ int rc = 0;
if (tcon->posix_extensions) {
rc = smb311_posix_get_inode_info(&inode, full_path,
@@ -2139,6 +2144,7 @@ cifs_mkdir_qinfo(struct inode *parent, struct dentry *dentry, umode_t mode,
if (parent->i_mode & S_ISGID)
mode |= S_ISGID;
+ sbflags = cifs_sb_flags(cifs_sb);
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
if (tcon->unix_ext) {
struct cifs_unix_set_info_args args = {
@@ -2148,7 +2154,7 @@ cifs_mkdir_qinfo(struct inode *parent, struct dentry *dentry, umode_t mode,
.mtime = NO_CHANGE_64,
.device = 0,
};
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
+ if (sbflags & CIFS_MOUNT_SET_UID) {
args.uid = current_fsuid();
if (parent->i_mode & S_ISGID)
args.gid = parent->i_gid;
@@ -2166,14 +2172,14 @@ cifs_mkdir_qinfo(struct inode *parent, struct dentry *dentry, umode_t mode,
{
#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
struct TCP_Server_Info *server = tcon->ses->server;
- if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) &&
+ if (!(sbflags & CIFS_MOUNT_CIFS_ACL) &&
(mode & S_IWUGO) == 0 && server->ops->mkdir_setinfo)
server->ops->mkdir_setinfo(inode, full_path, cifs_sb,
tcon, xid);
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)
+ if (sbflags & CIFS_MOUNT_DYNPERM)
inode->i_mode = (mode | S_IFDIR);
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
+ if (sbflags & CIFS_MOUNT_SET_UID) {
inode->i_uid = current_fsuid();
if (inode->i_mode & S_ISGID)
inode->i_gid = parent->i_gid;
@@ -2686,7 +2692,7 @@ cifs_dentry_needs_reval(struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);
struct cifsInodeInfo *cifs_i = CIFS_I(inode);
- struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode);
struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
struct cached_fid *cfid = NULL;
@@ -2727,7 +2733,7 @@ cifs_dentry_needs_reval(struct dentry *dentry)
}
/* hardlinked files w/ noserverino get "special" treatment */
- if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) &&
+ if (!(cifs_sb_flags(cifs_sb) & CIFS_MOUNT_SERVER_INUM) &&
S_ISREG(inode->i_mode) && inode->i_nlink != 1)
return true;
@@ -2752,10 +2758,10 @@ cifs_wait_bit_killable(struct wait_bit_key *key, int mode)
int
cifs_revalidate_mapping(struct inode *inode)
{
- int rc;
struct cifsInodeInfo *cifs_inode = CIFS_I(inode);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode);
unsigned long *flags = &cifs_inode->flags;
- struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ int rc;
/* swapfiles are not supposed to be shared */
if (IS_SWAPFILE(inode))
@@ -2768,7 +2774,7 @@ cifs_revalidate_mapping(struct inode *inode)
if (test_and_clear_bit(CIFS_INO_INVALID_MAPPING, flags)) {
/* for cache=singleclient, do not invalidate */
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RW_CACHE)
+ if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_RW_CACHE)
goto skip_invalidate;
cifs_inode->netfs.zero_point = cifs_inode->netfs.remote_i_size;
@@ -2892,10 +2898,11 @@ int cifs_revalidate_dentry(struct dentry *dentry)
int cifs_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask, unsigned int flags)
{
- struct dentry *dentry = path->dentry;
- struct cifs_sb_info *cifs_sb = CIFS_SB(dentry->d_sb);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(path->dentry);
struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
+ struct dentry *dentry = path->dentry;
struct inode *inode = d_inode(dentry);
+ unsigned int sbflags;
int rc;
if (unlikely(cifs_forced_shutdown(CIFS_SB(inode->i_sb))))
@@ -2952,12 +2959,13 @@ int cifs_getattr(struct mnt_idmap *idmap, const struct path *path,
* enabled, and the admin hasn't overridden them, set the ownership
* to the fsuid/fsgid of the current process.
*/
- if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) &&
- !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) &&
+ sbflags = cifs_sb_flags(cifs_sb);
+ if ((sbflags & CIFS_MOUNT_MULTIUSER) &&
+ !(sbflags & CIFS_MOUNT_CIFS_ACL) &&
!tcon->unix_ext) {
- if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID))
+ if (!(sbflags & CIFS_MOUNT_OVERR_UID))
stat->uid = current_fsuid();
- if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID))
+ if (!(sbflags & CIFS_MOUNT_OVERR_GID))
stat->gid = current_fsgid();
}
return 0;
@@ -3102,7 +3110,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
void *page = alloc_dentry_path();
struct inode *inode = d_inode(direntry);
struct cifsInodeInfo *cifsInode = CIFS_I(inode);
- struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode);
struct tcon_link *tlink;
struct cifs_tcon *pTcon;
struct cifs_unix_set_info_args *args = NULL;
@@ -3113,7 +3121,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
xid = get_xid();
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM)
+ if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NO_PERM)
attrs->ia_valid |= ATTR_FORCE;
rc = setattr_prepare(&nop_mnt_idmap, direntry, attrs);
@@ -3266,26 +3274,26 @@ out:
static int
cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
{
- unsigned int xid;
- kuid_t uid = INVALID_UID;
- kgid_t gid = INVALID_GID;
struct inode *inode = d_inode(direntry);
- struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct cifsInodeInfo *cifsInode = CIFS_I(inode);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode);
+ unsigned int sbflags = cifs_sb_flags(cifs_sb);
struct cifsFileInfo *cfile = NULL;
- const char *full_path;
void *page = alloc_dentry_path();
- int rc = -EACCES;
- __u32 dosattr = 0;
__u64 mode = NO_CHANGE_64;
- bool posix = cifs_sb_master_tcon(cifs_sb)->posix_extensions;
+ kuid_t uid = INVALID_UID;
+ kgid_t gid = INVALID_GID;
+ const char *full_path;
+ __u32 dosattr = 0;
+ int rc = -EACCES;
+ unsigned int xid;
xid = get_xid();
cifs_dbg(FYI, "setattr on file %pd attrs->ia_valid 0x%x\n",
direntry, attrs->ia_valid);
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM)
+ if (sbflags & CIFS_MOUNT_NO_PERM)
attrs->ia_valid |= ATTR_FORCE;
rc = setattr_prepare(&nop_mnt_idmap, direntry, attrs);
@@ -3346,8 +3354,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
if (attrs->ia_valid & ATTR_GID)
gid = attrs->ia_gid;
- if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) ||
- (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID)) {
+ if (sbflags & (CIFS_MOUNT_CIFS_ACL | CIFS_MOUNT_MODE_FROM_SID)) {
if (uid_valid(uid) || gid_valid(gid)) {
mode = NO_CHANGE_64;
rc = id_mode_to_cifs_acl(inode, full_path, &mode,
@@ -3358,9 +3365,9 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
goto cifs_setattr_exit;
}
}
- } else
- if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID))
+ } else if (!(sbflags & CIFS_MOUNT_SET_UID)) {
attrs->ia_valid &= ~(ATTR_UID | ATTR_GID);
+ }
/* skip mode change if it's just for clearing setuid/setgid */
if (attrs->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
@@ -3369,9 +3376,8 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
if (attrs->ia_valid & ATTR_MODE) {
mode = attrs->ia_mode;
rc = 0;
- if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) ||
- (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID) ||
- posix) {
+ if ((sbflags & (CIFS_MOUNT_CIFS_ACL | CIFS_MOUNT_MODE_FROM_SID)) ||
+ cifs_sb_master_tcon(cifs_sb)->posix_extensions) {
rc = id_mode_to_cifs_acl(inode, full_path, &mode,
INVALID_UID, INVALID_GID);
if (rc) {
@@ -3393,7 +3399,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
dosattr = cifsInode->cifsAttrs | ATTR_READONLY;
/* fix up mode if we're not using dynperm */
- if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0)
+ if ((sbflags & CIFS_MOUNT_DYNPERM) == 0)
attrs->ia_mode = inode->i_mode & ~S_IWUGO;
} else if ((mode & S_IWUGO) &&
(cifsInode->cifsAttrs & ATTR_READONLY)) {
@@ -3404,7 +3410,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
dosattr |= ATTR_NORMAL;
/* reset local inode permissions to normal */
- if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)) {
+ if (!(sbflags & CIFS_MOUNT_DYNPERM)) {
attrs->ia_mode &= ~(S_IALLUGO);
if (S_ISDIR(inode->i_mode))
attrs->ia_mode |=
@@ -3413,7 +3419,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
attrs->ia_mode |=
cifs_sb->ctx->file_mode;
}
- } else if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)) {
+ } else if (!(sbflags & CIFS_MOUNT_DYNPERM)) {
/* ignore mode change - ATTR_READONLY hasn't changed */
attrs->ia_valid &= ~ATTR_MODE;
}
diff --git a/fs/smb/client/ioctl.c b/fs/smb/client/ioctl.c
index 8dc2651e237f..9afab3237e54 100644
--- a/fs/smb/client/ioctl.c
+++ b/fs/smb/client/ioctl.c
@@ -216,7 +216,7 @@ static int cifs_shutdown(struct super_block *sb, unsigned long arg)
*/
case CIFS_GOING_FLAGS_LOGFLUSH:
case CIFS_GOING_FLAGS_NOLOGFLUSH:
- sbi->mnt_cifs_flags |= CIFS_MOUNT_SHUTDOWN;
+ atomic_or(CIFS_MOUNT_SHUTDOWN, &sbi->mnt_cifs_flags);
goto shutdown_good;
default:
rc = -EINVAL;
diff --git a/fs/smb/client/link.c b/fs/smb/client/link.c
index a2f7bfa8ad1e..434e8fe74080 100644
--- a/fs/smb/client/link.c
+++ b/fs/smb/client/link.c
@@ -544,14 +544,15 @@ int
cifs_symlink(struct mnt_idmap *idmap, struct inode *inode,
struct dentry *direntry, const char *symname)
{
- int rc = -EOPNOTSUPP;
- unsigned int xid;
- struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode);
+ struct inode *newinode = NULL;
struct tcon_link *tlink;
struct cifs_tcon *pTcon;
const char *full_path;
+ int rc = -EOPNOTSUPP;
+ unsigned int sbflags;
+ unsigned int xid;
void *page;
- struct inode *newinode = NULL;
if (unlikely(cifs_forced_shutdown(cifs_sb)))
return smb_EIO(smb_eio_trace_forced_shutdown);
@@ -580,6 +581,7 @@ cifs_symlink(struct mnt_idmap *idmap, struct inode *inode,
cifs_dbg(FYI, "symname is %s\n", symname);
/* BB what if DFS and this volume is on different share? BB */
+ sbflags = cifs_sb_flags(cifs_sb);
rc = -EOPNOTSUPP;
switch (cifs_symlink_type(cifs_sb)) {
case CIFS_SYMLINK_TYPE_UNIX:
@@ -594,14 +596,14 @@ cifs_symlink(struct mnt_idmap *idmap, struct inode *inode,
break;
case CIFS_SYMLINK_TYPE_MFSYMLINKS:
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) {
+ if (sbflags & CIFS_MOUNT_MF_SYMLINKS) {
rc = create_mf_symlink(xid, pTcon, cifs_sb,
full_path, symname);
}
break;
case CIFS_SYMLINK_TYPE_SFU:
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) {
+ if (sbflags & CIFS_MOUNT_UNX_EMUL) {
rc = __cifs_sfu_make_node(xid, inode, direntry, pTcon,
full_path, S_IFLNK,
0, symname);
diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c
index 22cde46309fe..bc24c92b8b95 100644
--- a/fs/smb/client/misc.c
+++ b/fs/smb/client/misc.c
@@ -275,13 +275,15 @@ dump_smb(void *buf, int smb_buf_length)
void
cifs_autodisable_serverino(struct cifs_sb_info *cifs_sb)
{
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) {
+ unsigned int sbflags = cifs_sb_flags(cifs_sb);
+
+ if (sbflags & CIFS_MOUNT_SERVER_INUM) {
struct cifs_tcon *tcon = NULL;
if (cifs_sb->master_tlink)
tcon = cifs_sb_master_tcon(cifs_sb);
- cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SERVER_INUM;
+ atomic_andnot(CIFS_MOUNT_SERVER_INUM, &cifs_sb->mnt_cifs_flags);
cifs_sb->mnt_cifs_serverino_autodisabled = true;
cifs_dbg(VFS, "Autodisabling the use of server inode numbers on %s\n",
tcon ? tcon->tree_name : "new server");
@@ -382,11 +384,13 @@ void cifs_done_oplock_break(struct cifsInodeInfo *cinode)
bool
backup_cred(struct cifs_sb_info *cifs_sb)
{
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPUID) {
+ unsigned int sbflags = cifs_sb_flags(cifs_sb);
+
+ if (sbflags & CIFS_MOUNT_CIFS_BACKUPUID) {
if (uid_eq(cifs_sb->ctx->backupuid, current_fsuid()))
return true;
}
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPGID) {
+ if (sbflags & CIFS_MOUNT_CIFS_BACKUPGID) {
if (in_group_p(cifs_sb->ctx->backupgid))
return true;
}
@@ -955,7 +959,7 @@ int cifs_update_super_prepath(struct cifs_sb_info *cifs_sb, char *prefix)
convert_delimiter(cifs_sb->prepath, CIFS_DIR_SEP(cifs_sb));
}
- cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH;
+ atomic_or(CIFS_MOUNT_USE_PREFIX_PATH, &cifs_sb->mnt_cifs_flags);
return 0;
}
@@ -984,7 +988,7 @@ int cifs_inval_name_dfs_link_error(const unsigned int xid,
* look up or tcon is not DFS.
*/
if (strlen(full_path) < 2 || !cifs_sb ||
- (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS) ||
+ (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NO_DFS) ||
!is_tcon_dfs(tcon))
return 0;
diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c
index 8615a8747b7f..be22bbc4a65a 100644
--- a/fs/smb/client/readdir.c
+++ b/fs/smb/client/readdir.c
@@ -121,7 +121,7 @@ retry:
* want to clobber the existing one with the one that
* the readdir code created.
*/
- if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM))
+ if (!(cifs_sb_flags(cifs_sb) & CIFS_MOUNT_SERVER_INUM))
fattr->cf_uniqueid = CIFS_I(inode)->uniqueid;
/*
@@ -177,6 +177,7 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
struct cifs_open_info_data data = {
.reparse = { .tag = fattr->cf_cifstag, },
};
+ unsigned int sbflags;
fattr->cf_uid = cifs_sb->ctx->linux_uid;
fattr->cf_gid = cifs_sb->ctx->linux_gid;
@@ -215,12 +216,12 @@ out_reparse:
* may look wrong since the inodes may not have timed out by the time
* "ls" does a stat() call on them.
*/
- if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) ||
- (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID))
+ sbflags = cifs_sb_flags(cifs_sb);
+ if (sbflags & (CIFS_MOUNT_CIFS_ACL | CIFS_MOUNT_MODE_FROM_SID))
fattr->cf_flags |= CIFS_FATTR_NEED_REVAL;
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL &&
- fattr->cf_cifsattrs & ATTR_SYSTEM) {
+ if ((sbflags & CIFS_MOUNT_UNX_EMUL) &&
+ (fattr->cf_cifsattrs & ATTR_SYSTEM)) {
if (fattr->cf_eof == 0) {
fattr->cf_mode &= ~S_IFMT;
fattr->cf_mode |= S_IFIFO;
@@ -345,13 +346,14 @@ static int
_initiate_cifs_search(const unsigned int xid, struct file *file,
const char *full_path)
{
- __u16 search_flags;
- int rc = 0;
- struct cifsFileInfo *cifsFile;
- struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(file);
struct tcon_link *tlink = NULL;
- struct cifs_tcon *tcon;
struct TCP_Server_Info *server;
+ struct cifsFileInfo *cifsFile;
+ struct cifs_tcon *tcon;
+ unsigned int sbflags;
+ __u16 search_flags;
+ int rc = 0;
if (file->private_data == NULL) {
tlink = cifs_sb_tlink(cifs_sb);
@@ -385,6 +387,7 @@ _initiate_cifs_search(const unsigned int xid, struct file *file,
cifs_dbg(FYI, "Full path: %s start at: %lld\n", full_path, file->f_pos);
ffirst_retry:
+ sbflags = cifs_sb_flags(cifs_sb);
/* test for Unix extensions */
/* but now check for them on the share/mount not on the SMB session */
/* if (cap_unix(tcon->ses) { */
@@ -395,7 +398,7 @@ ffirst_retry:
else if ((tcon->ses->capabilities &
tcon->ses->server->vals->cap_nt_find) == 0) {
cifsFile->srch_inf.info_level = SMB_FIND_FILE_INFO_STANDARD;
- } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) {
+ } else if (sbflags & CIFS_MOUNT_SERVER_INUM) {
cifsFile->srch_inf.info_level = SMB_FIND_FILE_ID_FULL_DIR_INFO;
} else /* not srvinos - BB fixme add check for backlevel? */ {
cifsFile->srch_inf.info_level = SMB_FIND_FILE_FULL_DIRECTORY_INFO;
@@ -411,8 +414,7 @@ ffirst_retry:
if (rc == 0) {
cifsFile->invalidHandle = false;
- } else if ((rc == -EOPNOTSUPP) &&
- (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)) {
+ } else if (rc == -EOPNOTSUPP && (sbflags & CIFS_MOUNT_SERVER_INUM)) {
cifs_autodisable_serverino(cifs_sb);
goto ffirst_retry;
}
@@ -690,7 +692,7 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos,
loff_t first_entry_in_buffer;
loff_t index_to_find = pos;
struct cifsFileInfo *cfile = file->private_data;
- struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(file);
struct TCP_Server_Info *server = tcon->ses->server;
/* check if index in the buffer */
@@ -955,6 +957,7 @@ static int cifs_filldir(char *find_entry, struct file *file,
struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
struct cifs_dirent de = { NULL, };
struct cifs_fattr fattr;
+ unsigned int sbflags;
struct qstr name;
int rc = 0;
@@ -1019,15 +1022,15 @@ static int cifs_filldir(char *find_entry, struct file *file,
break;
}
- if (de.ino && (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)) {
+ sbflags = cifs_sb_flags(cifs_sb);
+ if (de.ino && (sbflags & CIFS_MOUNT_SERVER_INUM)) {
fattr.cf_uniqueid = de.ino;
} else {
fattr.cf_uniqueid = iunique(sb, ROOT_I);
cifs_autodisable_serverino(cifs_sb);
}
- if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) &&
- couldbe_mf_symlink(&fattr))
+ if ((sbflags & CIFS_MOUNT_MF_SYMLINKS) && couldbe_mf_symlink(&fattr))
/*
* trying to get the type and mode can be slow,
* so just call those regular files for now, and mark
@@ -1058,7 +1061,7 @@ int cifs_readdir(struct file *file, struct dir_context *ctx)
const char *full_path;
void *page = alloc_dentry_path();
struct cached_fid *cfid = NULL;
- struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(file);
xid = get_xid();
diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c
index ce9b923498b5..cd1e1eaee67a 100644
--- a/fs/smb/client/reparse.c
+++ b/fs/smb/client/reparse.c
@@ -55,17 +55,18 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode,
const char *full_path, const char *symname)
{
struct reparse_symlink_data_buffer *buf = NULL;
- struct cifs_open_info_data data = {};
- struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode);
const char *symroot = cifs_sb->ctx->symlinkroot;
+ struct cifs_open_info_data data = {};
+ char sep = CIFS_DIR_SEP(cifs_sb);
+ char *symlink_target = NULL;
+ u16 len, plen, poff, slen;
+ unsigned int sbflags;
+ __le16 *path = NULL;
struct inode *new;
+ char *sym = NULL;
struct kvec iov;
- __le16 *path = NULL;
bool directory;
- char *symlink_target = NULL;
- char *sym = NULL;
- char sep = CIFS_DIR_SEP(cifs_sb);
- u16 len, plen, poff, slen;
int rc = 0;
if (strlen(symname) > REPARSE_SYM_PATH_MAX)
@@ -83,8 +84,8 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode,
.symlink_target = symlink_target,
};
- if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) &&
- symroot && symname[0] == '/') {
+ sbflags = cifs_sb_flags(cifs_sb);
+ if (!(sbflags & CIFS_MOUNT_POSIX_PATHS) && symroot && symname[0] == '/') {
/*
* This is a request to create an absolute symlink on the server
* which does not support POSIX paths, and expects symlink in
@@ -164,7 +165,7 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode,
* mask these characters in NT object prefix by '_' and then change
* them back.
*/
- if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && symname[0] == '/')
+ if (!(sbflags & CIFS_MOUNT_POSIX_PATHS) && symname[0] == '/')
sym[0] = sym[1] = sym[2] = sym[5] = '_';
path = cifs_convert_path_to_utf16(sym, cifs_sb);
@@ -173,7 +174,7 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode,
goto out;
}
- if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && symname[0] == '/') {
+ if (!(sbflags & CIFS_MOUNT_POSIX_PATHS) && symname[0] == '/') {
sym[0] = '\\';
sym[1] = sym[2] = '?';
sym[5] = ':';
@@ -197,7 +198,7 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode,
slen = 2 * UniStrnlen((wchar_t *)path, REPARSE_SYM_PATH_MAX);
poff = 0;
plen = slen;
- if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && symname[0] == '/') {
+ if (!(sbflags & CIFS_MOUNT_POSIX_PATHS) && symname[0] == '/') {
/*
* For absolute NT symlinks skip leading "\\??\\" in PrintName as
* PrintName is user visible location in DOS/Win32 format (not in NT format).
@@ -824,7 +825,7 @@ int smb2_parse_native_symlink(char **target, const char *buf, unsigned int len,
goto out;
}
- if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) &&
+ if (!(cifs_sb_flags(cifs_sb) & CIFS_MOUNT_POSIX_PATHS) &&
symroot && !relative) {
/*
* This is an absolute symlink from the server which does not
diff --git a/fs/smb/client/reparse.h b/fs/smb/client/reparse.h
index 570b0d25aeba..0164dc47bdfd 100644
--- a/fs/smb/client/reparse.h
+++ b/fs/smb/client/reparse.h
@@ -33,7 +33,7 @@ static inline kuid_t wsl_make_kuid(struct cifs_sb_info *cifs_sb,
{
u32 uid = le32_to_cpu(*(__le32 *)ptr);
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)
+ if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_OVERR_UID)
return cifs_sb->ctx->linux_uid;
return make_kuid(current_user_ns(), uid);
}
@@ -43,7 +43,7 @@ static inline kgid_t wsl_make_kgid(struct cifs_sb_info *cifs_sb,
{
u32 gid = le32_to_cpu(*(__le32 *)ptr);
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)
+ if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_OVERR_GID)
return cifs_sb->ctx->linux_gid;
return make_kgid(current_user_ns(), gid);
}
diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c
index aed49aaef8c4..9643eca0cb70 100644
--- a/fs/smb/client/smb1ops.c
+++ b/fs/smb/client/smb1ops.c
@@ -49,6 +49,7 @@ void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon,
if (!CIFSSMBQFSUnixInfo(xid, tcon)) {
__u64 cap = le64_to_cpu(tcon->fsUnixInfo.Capability);
+ unsigned int sbflags;
cifs_dbg(FYI, "unix caps which server supports %lld\n", cap);
/*
@@ -75,14 +76,16 @@ void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon,
if (cap & CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP)
cifs_dbg(VFS, "per-share encryption not supported yet\n");
+ if (cifs_sb)
+ sbflags = cifs_sb_flags(cifs_sb);
+
cap &= CIFS_UNIX_CAP_MASK;
if (ctx && ctx->no_psx_acl)
cap &= ~CIFS_UNIX_POSIX_ACL_CAP;
else if (CIFS_UNIX_POSIX_ACL_CAP & cap) {
cifs_dbg(FYI, "negotiated posix acl support\n");
if (cifs_sb)
- cifs_sb->mnt_cifs_flags |=
- CIFS_MOUNT_POSIXACL;
+ sbflags |= CIFS_MOUNT_POSIXACL;
}
if (ctx && ctx->posix_paths == 0)
@@ -90,10 +93,12 @@ void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon,
else if (cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) {
cifs_dbg(FYI, "negotiate posix pathnames\n");
if (cifs_sb)
- cifs_sb->mnt_cifs_flags |=
- CIFS_MOUNT_POSIX_PATHS;
+ sbflags |= CIFS_MOUNT_POSIX_PATHS;
}
+ if (cifs_sb)
+ atomic_set(&cifs_sb->mnt_cifs_flags, sbflags);
+
cifs_dbg(FYI, "Negotiate caps 0x%x\n", (int)cap);
#ifdef CONFIG_CIFS_DEBUG2
if (cap & CIFS_UNIX_FCNTL_CAP)
@@ -1147,7 +1152,7 @@ static int cifs_oplock_response(struct cifs_tcon *tcon, __u64 persistent_fid,
__u64 volatile_fid, __u16 net_fid,
struct cifsInodeInfo *cinode, unsigned int oplock)
{
- unsigned int sbflags = CIFS_SB(cinode->netfs.inode.i_sb)->mnt_cifs_flags;
+ unsigned int sbflags = cifs_sb_flags(CIFS_SB(cinode));
__u8 op;
op = !!((oplock & CIFS_CACHE_READ_FLG) || (sbflags & CIFS_MOUNT_RO_CACHE));
@@ -1282,7 +1287,8 @@ cifs_make_node(unsigned int xid, struct inode *inode,
struct dentry *dentry, struct cifs_tcon *tcon,
const char *full_path, umode_t mode, dev_t dev)
{
- struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode);
+ unsigned int sbflags = cifs_sb_flags(cifs_sb);
struct inode *newinode = NULL;
int rc;
@@ -1298,7 +1304,7 @@ cifs_make_node(unsigned int xid, struct inode *inode,
.mtime = NO_CHANGE_64,
.device = dev,
};
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
+ if (sbflags & CIFS_MOUNT_SET_UID) {
args.uid = current_fsuid();
args.gid = current_fsgid();
} else {
@@ -1317,7 +1323,7 @@ cifs_make_node(unsigned int xid, struct inode *inode,
if (rc == 0)
d_instantiate(dentry, newinode);
return rc;
- } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) {
+ } else if (sbflags & CIFS_MOUNT_UNX_EMUL) {
/*
* Check if mounted with mount parm 'sfu' mount parm.
* SFU emulation should work with all servers
diff --git a/fs/smb/client/smb2file.c b/fs/smb/client/smb2file.c
index 1ab41de2b634..ed651c946251 100644
--- a/fs/smb/client/smb2file.c
+++ b/fs/smb/client/smb2file.c
@@ -72,7 +72,7 @@ int smb2_fix_symlink_target_type(char **target, bool directory, struct cifs_sb_i
* POSIX server does not distinguish between symlinks to file and
* symlink directory. So nothing is needed to fix on the client side.
*/
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)
+ if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_POSIX_PATHS)
return 0;
if (!*target)
diff --git a/fs/smb/client/smb2misc.c b/fs/smb/client/smb2misc.c
index e19674d9b92b..973fce3c959c 100644
--- a/fs/smb/client/smb2misc.c
+++ b/fs/smb/client/smb2misc.c
@@ -455,17 +455,8 @@ calc_size_exit:
__le16 *
cifs_convert_path_to_utf16(const char *from, struct cifs_sb_info *cifs_sb)
{
- int len;
const char *start_of_path;
- __le16 *to;
- int map_type;
-
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SFM_CHR)
- map_type = SFM_MAP_UNI_RSVD;
- else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR)
- map_type = SFU_MAP_UNI_RSVD;
- else
- map_type = NO_MAP_UNI_RSVD;
+ int len;
/* Windows doesn't allow paths beginning with \ */
if (from[0] == '\\')
@@ -479,14 +470,13 @@ cifs_convert_path_to_utf16(const char *from, struct cifs_sb_info *cifs_sb)
} else
start_of_path = from;
- to = cifs_strndup_to_utf16(start_of_path, PATH_MAX, &len,
- cifs_sb->local_nls, map_type);
- return to;
+ return cifs_strndup_to_utf16(start_of_path, PATH_MAX, &len,
+ cifs_sb->local_nls, cifs_remap(cifs_sb));
}
__le32 smb2_get_lease_state(struct cifsInodeInfo *cinode, unsigned int oplock)
{
- unsigned int sbflags = CIFS_SB(cinode->netfs.inode.i_sb)->mnt_cifs_flags;
+ unsigned int sbflags = cifs_sb_flags(CIFS_SB(cinode));
__le32 lease = 0;
if ((oplock & CIFS_CACHE_WRITE_FLG) || (sbflags & CIFS_MOUNT_RW_CACHE))
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index fea9a35caa57..7f2d3459cbf9 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -986,7 +986,7 @@ smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon,
rc = -EREMOTE;
}
if (rc == -EREMOTE && IS_ENABLED(CONFIG_CIFS_DFS_UPCALL) &&
- (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS))
+ (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NO_DFS))
rc = -EOPNOTSUPP;
goto out;
}
@@ -2691,7 +2691,7 @@ static int smb2_oplock_response(struct cifs_tcon *tcon, __u64 persistent_fid,
__u64 volatile_fid, __u16 net_fid,
struct cifsInodeInfo *cinode, unsigned int oplock)
{
- unsigned int sbflags = CIFS_SB(cinode->netfs.inode.i_sb)->mnt_cifs_flags;
+ unsigned int sbflags = cifs_sb_flags(CIFS_SB(cinode));
__u8 op;
if (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING)
@@ -5332,7 +5332,7 @@ static int smb2_make_node(unsigned int xid, struct inode *inode,
struct dentry *dentry, struct cifs_tcon *tcon,
const char *full_path, umode_t mode, dev_t dev)
{
- struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ unsigned int sbflags = cifs_sb_flags(CIFS_SB(inode));
int rc = -EOPNOTSUPP;
/*
@@ -5341,7 +5341,7 @@ static int smb2_make_node(unsigned int xid, struct inode *inode,
* supports block and char device, socket & fifo,
* and was used by default in earlier versions of Windows
*/
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) {
+ if (sbflags & CIFS_MOUNT_UNX_EMUL) {
rc = cifs_sfu_make_node(xid, inode, dentry, tcon,
full_path, mode, dev);
} else if (CIFS_REPARSE_SUPPORT(tcon)) {
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index ef655acf673d..04e361ed2356 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -1714,19 +1714,17 @@ SMB2_auth_kerberos(struct SMB2_sess_data *sess_data)
is_binding = (ses->ses_status == SES_GOOD);
spin_unlock(&ses->ses_lock);
- /* keep session key if binding */
- if (!is_binding) {
- kfree_sensitive(ses->auth_key.response);
- ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len,
- GFP_KERNEL);
- if (!ses->auth_key.response) {
- cifs_dbg(VFS, "Kerberos can't allocate (%u bytes) memory\n",
- msg->sesskey_len);
- rc = -ENOMEM;
- goto out_put_spnego_key;
- }
- ses->auth_key.len = msg->sesskey_len;
+ kfree_sensitive(ses->auth_key.response);
+ ses->auth_key.response = kmemdup(msg->data,
+ msg->sesskey_len,
+ GFP_KERNEL);
+ if (!ses->auth_key.response) {
+ cifs_dbg(VFS, "%s: can't allocate (%u bytes) memory\n",
+ __func__, msg->sesskey_len);
+ rc = -ENOMEM;
+ goto out_put_spnego_key;
}
+ ses->auth_key.len = msg->sesskey_len;
sess_data->iov[1].iov_base = msg->data + msg->sesskey_len;
sess_data->iov[1].iov_len = msg->secblob_len;
@@ -3182,22 +3180,19 @@ SMB2_open_init(struct cifs_tcon *tcon, struct TCP_Server_Info *server,
}
if ((oparms->disposition != FILE_OPEN) && (oparms->cifs_sb)) {
+ unsigned int sbflags = cifs_sb_flags(oparms->cifs_sb);
bool set_mode;
bool set_owner;
- if ((oparms->cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID) &&
- (oparms->mode != ACL_NO_MODE))
+ if ((sbflags & CIFS_MOUNT_MODE_FROM_SID) &&
+ oparms->mode != ACL_NO_MODE) {
set_mode = true;
- else {
+ } else {
set_mode = false;
oparms->mode = ACL_NO_MODE;
}
- if (oparms->cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UID_FROM_ACL)
- set_owner = true;
- else
- set_owner = false;
-
+ set_owner = sbflags & CIFS_MOUNT_UID_FROM_ACL;
if (set_owner | set_mode) {
cifs_dbg(FYI, "add sd with mode 0x%x\n", oparms->mode);
rc = add_sd_context(iov, &n_iov, oparms->mode, set_owner);
diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c
index 75697f6d2566..05f8099047e1 100644
--- a/fs/smb/client/transport.c
+++ b/fs/smb/client/transport.c
@@ -807,16 +807,21 @@ cifs_cancelled_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid)
}
/*
- * Return a channel (master if none) of @ses that can be used to send
- * regular requests.
+ * cifs_pick_channel - pick an eligible channel for network operations
*
- * If we are currently binding a new channel (negprot/sess.setup),
- * return the new incomplete channel.
+ * @ses: session reference
+ *
+ * Select an eligible channel (not terminating and not marked as needing
+ * reconnect), preferring the least loaded one. If no eligible channel is
+ * found, fall back to the primary channel (index 0).
+ *
+ * Return: TCP_Server_Info pointer for the chosen channel, or NULL if @ses is
+ * NULL.
*/
struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses)
{
uint index = 0;
- unsigned int min_in_flight = UINT_MAX, max_in_flight = 0;
+ unsigned int min_in_flight = UINT_MAX;
struct TCP_Server_Info *server = NULL;
int i, start, cur;
@@ -846,14 +851,8 @@ struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses)
min_in_flight = server->in_flight;
index = cur;
}
- if (server->in_flight > max_in_flight)
- max_in_flight = server->in_flight;
}
- /* if all channels are equally loaded, fall back to round-robin */
- if (min_in_flight == max_in_flight)
- index = (uint)start % ses->chan_count;
-
server = ses->chans[index].server;
spin_unlock(&ses->chan_lock);
diff --git a/fs/smb/client/xattr.c b/fs/smb/client/xattr.c
index e1a7d9a10a53..23227f2f9428 100644
--- a/fs/smb/client/xattr.c
+++ b/fs/smb/client/xattr.c
@@ -149,7 +149,7 @@ static int cifs_xattr_set(const struct xattr_handler *handler,
break;
}
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR)
+ if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NO_XATTR)
goto out;
if (pTcon->ses->server->ops->set_EA) {
@@ -309,7 +309,7 @@ static int cifs_xattr_get(const struct xattr_handler *handler,
break;
}
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR)
+ if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NO_XATTR)
goto out;
if (pTcon->ses->server->ops->query_all_EAs)
@@ -398,7 +398,7 @@ ssize_t cifs_listxattr(struct dentry *direntry, char *data, size_t buf_size)
if (unlikely(cifs_forced_shutdown(cifs_sb)))
return smb_EIO(smb_eio_trace_forced_shutdown);
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR)
+ if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NO_XATTR)
return -EOPNOTSUPP;
tlink = cifs_sb_tlink(cifs_sb);
diff --git a/fs/smb/server/Kconfig b/fs/smb/server/Kconfig
index 2775162c535c..12594879cb64 100644
--- a/fs/smb/server/Kconfig
+++ b/fs/smb/server/Kconfig
@@ -13,6 +13,7 @@ config SMB_SERVER
select CRYPTO_LIB_MD5
select CRYPTO_LIB_SHA256
select CRYPTO_LIB_SHA512
+ select CRYPTO_LIB_UTILS
select CRYPTO_CMAC
select CRYPTO_AEAD2
select CRYPTO_CCM
diff --git a/fs/smb/server/auth.c b/fs/smb/server/auth.c
index 580c4d303dc3..5fe8c667c6b1 100644
--- a/fs/smb/server/auth.c
+++ b/fs/smb/server/auth.c
@@ -15,6 +15,7 @@
#include <crypto/aead.h>
#include <crypto/md5.h>
#include <crypto/sha2.h>
+#include <crypto/utils.h>
#include <linux/random.h>
#include <linux/scatterlist.h>
@@ -165,7 +166,8 @@ int ksmbd_auth_ntlmv2(struct ksmbd_conn *conn, struct ksmbd_session *sess,
ntlmv2_rsp, CIFS_HMAC_MD5_HASH_SIZE,
sess->sess_key);
- if (memcmp(ntlmv2->ntlmv2_hash, ntlmv2_rsp, CIFS_HMAC_MD5_HASH_SIZE) != 0)
+ if (crypto_memneq(ntlmv2->ntlmv2_hash, ntlmv2_rsp,
+ CIFS_HMAC_MD5_HASH_SIZE))
return -EINVAL;
return 0;
}
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index 95901a78951c..743c629fe7ec 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -4,6 +4,7 @@
* Copyright (C) 2018 Samsung Electronics Co., Ltd.
*/
+#include <crypto/utils.h>
#include <linux/inetdevice.h>
#include <net/addrconf.h>
#include <linux/syscalls.h>
@@ -8880,7 +8881,7 @@ int smb2_check_sign_req(struct ksmbd_work *work)
ksmbd_sign_smb2_pdu(work->conn, work->sess->sess_key, iov, 1,
signature);
- if (memcmp(signature, signature_req, SMB2_SIGNATURE_SIZE)) {
+ if (crypto_memneq(signature, signature_req, SMB2_SIGNATURE_SIZE)) {
pr_err("bad smb2 signature\n");
return 0;
}
@@ -8968,7 +8969,7 @@ int smb3_check_sign_req(struct ksmbd_work *work)
if (ksmbd_sign_smb3_pdu(conn, signing_key, iov, 1, signature))
return 0;
- if (memcmp(signature, signature_req, SMB2_SIGNATURE_SIZE)) {
+ if (crypto_memneq(signature, signature_req, SMB2_SIGNATURE_SIZE)) {
pr_err("bad smb2 signature\n");
return 0;
}
diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c
index 7c53b78b818e..188572491d53 100644
--- a/fs/smb/server/transport_rdma.c
+++ b/fs/smb/server/transport_rdma.c
@@ -2540,9 +2540,9 @@ static int smb_direct_prepare(struct ksmbd_transport *t)
goto put;
req = (struct smbdirect_negotiate_req *)recvmsg->packet;
- sp->max_recv_size = min_t(int, sp->max_recv_size,
+ sp->max_recv_size = min_t(u32, sp->max_recv_size,
le32_to_cpu(req->preferred_send_size));
- sp->max_send_size = min_t(int, sp->max_send_size,
+ sp->max_send_size = min_t(u32, sp->max_send_size,
le32_to_cpu(req->max_receive_size));
sp->max_fragmented_send_size =
le32_to_cpu(req->max_fragmented_size);
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index 8e958db5f786..67abd4dff222 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -344,6 +344,9 @@ int squashfs_read_metadata(struct super_block *sb, void *buffer,
if (unlikely(length < 0))
return -EIO;
+ if (unlikely(*offset < 0 || *offset >= SQUASHFS_METADATA_SIZE))
+ return -EIO;
+
while (length) {
entry = squashfs_cache_get(sb, msblk->block_cache, *block, 0);
if (entry->error) {
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index 9c6765cc2d44..bd8fbb40b49e 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -872,6 +872,34 @@ resv_err:
return err2;
}
+void
+xfs_growfs_compute_deltas(
+ struct xfs_mount *mp,
+ xfs_rfsblock_t nb,
+ int64_t *deltap,
+ xfs_agnumber_t *nagcountp)
+{
+ xfs_rfsblock_t nb_div, nb_mod;
+ int64_t delta;
+ xfs_agnumber_t nagcount;
+
+ nb_div = nb;
+ nb_mod = do_div(nb_div, mp->m_sb.sb_agblocks);
+ if (nb_mod && nb_mod >= XFS_MIN_AG_BLOCKS)
+ nb_div++;
+ else if (nb_mod)
+ nb = nb_div * mp->m_sb.sb_agblocks;
+
+ if (nb_div > XFS_MAX_AGNUMBER + 1) {
+ nb_div = XFS_MAX_AGNUMBER + 1;
+ nb = nb_div * mp->m_sb.sb_agblocks;
+ }
+ nagcount = nb_div;
+ delta = nb - mp->m_sb.sb_dblocks;
+ *deltap = delta;
+ *nagcountp = nagcount;
+}
+
/*
* Extent the AG indicated by the @id by the length passed in
*/
diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h
index 1f24cfa27321..3cd4790768ff 100644
--- a/fs/xfs/libxfs/xfs_ag.h
+++ b/fs/xfs/libxfs/xfs_ag.h
@@ -331,6 +331,9 @@ struct aghdr_init_data {
int xfs_ag_init_headers(struct xfs_mount *mp, struct aghdr_init_data *id);
int xfs_ag_shrink_space(struct xfs_perag *pag, struct xfs_trans **tpp,
xfs_extlen_t delta);
+void
+xfs_growfs_compute_deltas(struct xfs_mount *mp, xfs_rfsblock_t nb,
+ int64_t *deltap, xfs_agnumber_t *nagcountp);
int xfs_ag_extend_space(struct xfs_perag *pag, struct xfs_trans *tp,
xfs_extlen_t len);
int xfs_ag_get_geometry(struct xfs_perag *pag, struct xfs_ag_geometry *ageo);
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index a017016e9075..3794e5412eba 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -268,6 +268,10 @@ xfs_inode_from_disk(
}
if (xfs_is_reflink_inode(ip))
xfs_ifork_init_cow(ip);
+ if (xfs_is_metadir_inode(ip)) {
+ XFS_STATS_DEC(ip->i_mount, xs_inodes_active);
+ XFS_STATS_INC(ip->i_mount, xs_inodes_meta);
+ }
return 0;
out_destroy_data_fork:
diff --git a/fs/xfs/libxfs/xfs_metafile.c b/fs/xfs/libxfs/xfs_metafile.c
index cf239f862212..71f004e9dc64 100644
--- a/fs/xfs/libxfs/xfs_metafile.c
+++ b/fs/xfs/libxfs/xfs_metafile.c
@@ -61,6 +61,9 @@ xfs_metafile_set_iflag(
ip->i_diflags2 |= XFS_DIFLAG2_METADATA;
ip->i_metatype = metafile_type;
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+ XFS_STATS_DEC(ip->i_mount, xs_inodes_active);
+ XFS_STATS_INC(ip->i_mount, xs_inodes_meta);
}
/* Clear the metadata directory inode flag. */
@@ -74,6 +77,8 @@ xfs_metafile_clear_iflag(
ip->i_diflags2 &= ~XFS_DIFLAG2_METADATA;
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ XFS_STATS_INC(ip->i_mount, xs_inodes_active);
+ XFS_STATS_DEC(ip->i_mount, xs_inodes_meta);
}
/*
diff --git a/fs/xfs/libxfs/xfs_ondisk.h b/fs/xfs/libxfs/xfs_ondisk.h
index 2e9715cc1641..23cde1248f01 100644
--- a/fs/xfs/libxfs/xfs_ondisk.h
+++ b/fs/xfs/libxfs/xfs_ondisk.h
@@ -73,7 +73,7 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_free_hdr, 64);
XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_leaf, 64);
XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_leaf_hdr, 64);
- XFS_CHECK_STRUCT_SIZE(struct xfs_attr_leaf_entry, 8);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_attr_leaf_entry, 8);
XFS_CHECK_STRUCT_SIZE(struct xfs_attr_leaf_hdr, 32);
XFS_CHECK_STRUCT_SIZE(struct xfs_attr_leaf_map, 4);
XFS_CHECK_STRUCT_SIZE(struct xfs_attr_leaf_name_local, 4);
@@ -116,7 +116,7 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_STRUCT_SIZE(struct xfs_da_intnode, 16);
XFS_CHECK_STRUCT_SIZE(struct xfs_da_node_entry, 8);
XFS_CHECK_STRUCT_SIZE(struct xfs_da_node_hdr, 16);
- XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_data_free, 4);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_data_free, 4);
XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_data_hdr, 16);
XFS_CHECK_OFFSET(struct xfs_dir2_data_unused, freetag, 0);
XFS_CHECK_OFFSET(struct xfs_dir2_data_unused, length, 2);
@@ -136,16 +136,7 @@ xfs_check_ondisk_structs(void)
/* ondisk dir/attr structures from xfs/122 */
XFS_CHECK_STRUCT_SIZE(struct xfs_attr_sf_entry, 3);
XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_data_free, 4);
- XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_data_hdr, 16);
XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_data_unused, 6);
- XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_free, 16);
- XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_free_hdr, 16);
- XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_leaf, 16);
- XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_leaf_entry, 8);
- XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_leaf_hdr, 16);
- XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_leaf_tail, 4);
- XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_sf_entry, 3);
- XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_sf_hdr, 10);
/* log structures */
XFS_CHECK_STRUCT_SIZE(struct xfs_buf_log_format, 88);
@@ -217,11 +208,6 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_OFFSET(struct xfs_dir3_free, hdr.hdr.magic, 0);
XFS_CHECK_OFFSET(struct xfs_attr3_leafblock, hdr.info.hdr, 0);
- XFS_CHECK_STRUCT_SIZE(struct xfs_bulkstat, 192);
- XFS_CHECK_STRUCT_SIZE(struct xfs_inumbers, 24);
- XFS_CHECK_STRUCT_SIZE(struct xfs_bulkstat_req, 64);
- XFS_CHECK_STRUCT_SIZE(struct xfs_inumbers_req, 64);
-
/*
* Make sure the incore inode timestamp range corresponds to hand
* converted values based on the ondisk format specification.
@@ -301,6 +287,40 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_SB_OFFSET(sb_pad, 281);
XFS_CHECK_SB_OFFSET(sb_rtstart, 288);
XFS_CHECK_SB_OFFSET(sb_rtreserved, 296);
+
+ /*
+ * ioctl UABI
+ *
+ * Due to different padding/alignment requirements across
+ * different architectures, some structures are ommited from
+ * the size checks. In addition, structures with architecture
+ * dependent size fields are also ommited (e.g. __kernel_long_t).
+ */
+ XFS_CHECK_STRUCT_SIZE(struct xfs_bulkstat, 192);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_inumbers, 24);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_bulkstat_req, 64);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_inumbers_req, 64);
+ XFS_CHECK_STRUCT_SIZE(struct dioattr, 12);
+ XFS_CHECK_STRUCT_SIZE(struct getbmap, 32);
+ XFS_CHECK_STRUCT_SIZE(struct getbmapx, 48);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_attrlist_cursor, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_attrlist, 8);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_attrlist, 8);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_attrlist_ent, 4);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_ag_geometry, 128);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_rtgroup_geometry, 128);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_error_injection, 8);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_fsop_geom, 256);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_fsop_geom_v4, 112);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_fsop_counts, 32);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_fsop_resblks, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_growfs_log, 8);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_bulk_ireq, 64);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_fs_eofblocks, 128);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_fsid, 8);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_scrub_metadata, 64);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_scrub_vec, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_scrub_vec_head, 40);
}
#endif /* __XFS_ONDISK_H */
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 38d16fe1f6d8..47322adb7690 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -1347,6 +1347,9 @@ xfs_log_sb(
* feature was introduced. This counter can go negative due to the way
* we handle nearly-lockless reservations, so we must use the _positive
* variant here to avoid writing out nonsense frextents.
+ *
+ * RT groups are only supported on v5 file systems, which always
+ * have lazy SB counters.
*/
if (xfs_has_rtgroups(mp) && !xfs_has_zoned(mp)) {
mp->m_sb.sb_frextents =
diff --git a/fs/xfs/scrub/dir_repair.c b/fs/xfs/scrub/dir_repair.c
index 9dc55c918c78..23b80c54aa60 100644
--- a/fs/xfs/scrub/dir_repair.c
+++ b/fs/xfs/scrub/dir_repair.c
@@ -177,7 +177,7 @@ xrep_dir_teardown(
rd->dir_names = NULL;
if (rd->dir_entries)
xfarray_destroy(rd->dir_entries);
- rd->dir_names = NULL;
+ rd->dir_entries = NULL;
}
/* Set up for a directory repair. */
diff --git a/fs/xfs/scrub/orphanage.c b/fs/xfs/scrub/orphanage.c
index 52a108f6d5f4..33c6db6b4498 100644
--- a/fs/xfs/scrub/orphanage.c
+++ b/fs/xfs/scrub/orphanage.c
@@ -442,6 +442,11 @@ xrep_adoption_check_dcache(
return 0;
d_child = try_lookup_noperm(&qname, d_orphanage);
+ if (IS_ERR(d_child)) {
+ dput(d_orphanage);
+ return PTR_ERR(d_child);
+ }
+
if (d_child) {
trace_xrep_adoption_check_child(sc->mp, d_child);
@@ -479,7 +484,7 @@ xrep_adoption_zap_dcache(
return;
d_child = try_lookup_noperm(&qname, d_orphanage);
- while (d_child != NULL) {
+ while (!IS_ERR_OR_NULL(d_child)) {
trace_xrep_adoption_invalidate_child(sc->mp, d_child);
ASSERT(d_is_negative(d_child));
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 17255c41786b..8d64d904d73c 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -95,18 +95,17 @@ xfs_growfs_data_private(
struct xfs_growfs_data *in) /* growfs data input struct */
{
xfs_agnumber_t oagcount = mp->m_sb.sb_agcount;
+ xfs_rfsblock_t nb = in->newblocks;
struct xfs_buf *bp;
int error;
xfs_agnumber_t nagcount;
xfs_agnumber_t nagimax = 0;
- xfs_rfsblock_t nb, nb_div, nb_mod;
int64_t delta;
bool lastag_extended = false;
struct xfs_trans *tp;
struct aghdr_init_data id = {};
struct xfs_perag *last_pag;
- nb = in->newblocks;
error = xfs_sb_validate_fsb_count(&mp->m_sb, nb);
if (error)
return error;
@@ -125,20 +124,8 @@ xfs_growfs_data_private(
mp->m_sb.sb_rextsize);
if (error)
return error;
+ xfs_growfs_compute_deltas(mp, nb, &delta, &nagcount);
- nb_div = nb;
- nb_mod = do_div(nb_div, mp->m_sb.sb_agblocks);
- if (nb_mod && nb_mod >= XFS_MIN_AG_BLOCKS)
- nb_div++;
- else if (nb_mod)
- nb = nb_div * mp->m_sb.sb_agblocks;
-
- if (nb_div > XFS_MAX_AGNUMBER + 1) {
- nb_div = XFS_MAX_AGNUMBER + 1;
- nb = nb_div * mp->m_sb.sb_agblocks;
- }
- nagcount = nb_div;
- delta = nb - mp->m_sb.sb_dblocks;
/*
* Reject filesystems with a single AG because they are not
* supported, and reject a shrink operation that would cause a
diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c
index 169123772cb3..239b843e83d4 100644
--- a/fs/xfs/xfs_health.c
+++ b/fs/xfs/xfs_health.c
@@ -314,6 +314,22 @@ xfs_rgno_mark_sick(
xfs_rtgroup_put(rtg);
}
+static inline void xfs_inode_report_fserror(struct xfs_inode *ip)
+{
+ /*
+ * Do not report inodes being constructed or freed, or metadata inodes,
+ * to fsnotify.
+ */
+ if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIM) ||
+ xfs_is_internal_inode(ip)) {
+ fserror_report_metadata(ip->i_mount->m_super, -EFSCORRUPTED,
+ GFP_NOFS);
+ return;
+ }
+
+ fserror_report_file_metadata(VFS_I(ip), -EFSCORRUPTED, GFP_NOFS);
+}
+
/* Mark the unhealthy parts of an inode. */
void
xfs_inode_mark_sick(
@@ -339,7 +355,7 @@ xfs_inode_mark_sick(
inode_state_clear(VFS_I(ip), I_DONTCACHE);
spin_unlock(&VFS_I(ip)->i_lock);
- fserror_report_file_metadata(VFS_I(ip), -EFSCORRUPTED, GFP_NOFS);
+ xfs_inode_report_fserror(ip);
if (mask)
xfs_healthmon_report_inode(ip, XFS_HEALTHMON_SICK, old_mask,
mask);
@@ -371,7 +387,7 @@ xfs_inode_mark_corrupt(
inode_state_clear(VFS_I(ip), I_DONTCACHE);
spin_unlock(&VFS_I(ip)->i_lock);
- fserror_report_file_metadata(VFS_I(ip), -EFSCORRUPTED, GFP_NOFS);
+ xfs_inode_report_fserror(ip);
if (mask)
xfs_healthmon_report_inode(ip, XFS_HEALTHMON_CORRUPT, old_mask,
mask);
diff --git a/fs/xfs/xfs_healthmon.c b/fs/xfs/xfs_healthmon.c
index e37c18cec372..4a06d6632f65 100644
--- a/fs/xfs/xfs_healthmon.c
+++ b/fs/xfs/xfs_healthmon.c
@@ -69,7 +69,7 @@ xfs_healthmon_get(
struct xfs_healthmon *hm;
rcu_read_lock();
- hm = mp->m_healthmon;
+ hm = rcu_dereference(mp->m_healthmon);
if (hm && !refcount_inc_not_zero(&hm->ref))
hm = NULL;
rcu_read_unlock();
@@ -110,13 +110,13 @@ xfs_healthmon_attach(
struct xfs_healthmon *hm)
{
spin_lock(&xfs_healthmon_lock);
- if (mp->m_healthmon != NULL) {
+ if (rcu_access_pointer(mp->m_healthmon) != NULL) {
spin_unlock(&xfs_healthmon_lock);
return -EEXIST;
}
refcount_inc(&hm->ref);
- mp->m_healthmon = hm;
+ rcu_assign_pointer(mp->m_healthmon, hm);
hm->mount_cookie = (uintptr_t)mp->m_super;
spin_unlock(&xfs_healthmon_lock);
@@ -128,13 +128,16 @@ STATIC void
xfs_healthmon_detach(
struct xfs_healthmon *hm)
{
+ struct xfs_mount *mp;
+
spin_lock(&xfs_healthmon_lock);
if (hm->mount_cookie == DETACHED_MOUNT_COOKIE) {
spin_unlock(&xfs_healthmon_lock);
return;
}
- XFS_M((struct super_block *)hm->mount_cookie)->m_healthmon = NULL;
+ mp = XFS_M((struct super_block *)hm->mount_cookie);
+ rcu_assign_pointer(mp->m_healthmon, NULL);
hm->mount_cookie = DETACHED_MOUNT_COOKIE;
spin_unlock(&xfs_healthmon_lock);
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index dbaab4ae709f..a7a09e7eec81 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -106,7 +106,7 @@ xfs_inode_alloc(
mapping_set_folio_min_order(VFS_I(ip)->i_mapping,
M_IGEO(mp)->min_folio_order);
- XFS_STATS_INC(mp, vn_active);
+ XFS_STATS_INC(mp, xs_inodes_active);
ASSERT(atomic_read(&ip->i_pincount) == 0);
ASSERT(ip->i_ino == 0);
@@ -172,7 +172,10 @@ __xfs_inode_free(
/* asserts to verify all state is correct here */
ASSERT(atomic_read(&ip->i_pincount) == 0);
ASSERT(!ip->i_itemp || list_empty(&ip->i_itemp->ili_item.li_bio_list));
- XFS_STATS_DEC(ip->i_mount, vn_active);
+ if (xfs_is_metadir_inode(ip))
+ XFS_STATS_DEC(ip->i_mount, xs_inodes_meta);
+ else
+ XFS_STATS_DEC(ip->i_mount, xs_inodes_active);
call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback);
}
@@ -636,6 +639,14 @@ xfs_iget_cache_miss(
if (!ip)
return -ENOMEM;
+ /*
+ * Set XFS_INEW as early as possible so that the health code won't pass
+ * the inode to the fserror code if the ondisk inode cannot be loaded.
+ * We're going to free the xfs_inode immediately if that happens, which
+ * would lead to UAF problems.
+ */
+ xfs_iflags_set(ip, XFS_INEW);
+
error = xfs_imap(pag, tp, ip->i_ino, &ip->i_imap, flags);
if (error)
goto out_destroy;
@@ -713,7 +724,6 @@ xfs_iget_cache_miss(
ip->i_udquot = NULL;
ip->i_gdquot = NULL;
ip->i_pdquot = NULL;
- xfs_iflags_set(ip, XFS_INEW);
/* insert the new inode */
spin_lock(&pag->pag_ici_lock);
@@ -2234,7 +2244,7 @@ xfs_inode_mark_reclaimable(
struct xfs_mount *mp = ip->i_mount;
bool need_inactive;
- XFS_STATS_INC(mp, vn_reclaim);
+ XFS_STATS_INC(mp, xs_inode_mark_reclaimable);
/*
* We should never get here with any of the reclaim flags already set.
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 61c71128d171..ddd4028be8d6 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -345,7 +345,7 @@ typedef struct xfs_mount {
struct xfs_hooks m_dir_update_hooks;
/* Private data referring to a health monitor object. */
- struct xfs_healthmon *m_healthmon;
+ struct xfs_healthmon __rcu *m_healthmon;
} xfs_mount_t;
#define M_IGEO(mp) (&(mp)->m_ino_geo)
diff --git a/fs/xfs/xfs_notify_failure.c b/fs/xfs/xfs_notify_failure.c
index 6be19fa1ebe2..64c8afb935c2 100644
--- a/fs/xfs/xfs_notify_failure.c
+++ b/fs/xfs/xfs_notify_failure.c
@@ -304,7 +304,7 @@ xfs_dax_notify_dev_failure(
error = xfs_alloc_read_agf(pag, tp, 0, &agf_bp);
if (error) {
- xfs_perag_put(pag);
+ xfs_perag_rele(pag);
break;
}
@@ -340,7 +340,7 @@ xfs_dax_notify_dev_failure(
if (rtg)
xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP);
if (error) {
- xfs_group_put(xg);
+ xfs_group_rele(xg);
break;
}
}
diff --git a/fs/xfs/xfs_platform.h b/fs/xfs/xfs_platform.h
index 1e59bf94d1f2..59a33c60e0ca 100644
--- a/fs/xfs/xfs_platform.h
+++ b/fs/xfs/xfs_platform.h
@@ -235,6 +235,10 @@ int xfs_rw_bdev(struct block_device *bdev, sector_t sector, unsigned int count,
#ifdef XFS_WARN
+/*
+ * Please note that this ASSERT doesn't kill the kernel. It will if the kernel
+ * has panic_on_warn set.
+ */
#define ASSERT(expr) \
(likely(expr) ? (void)0 : asswarn(NULL, #expr, __FILE__, __LINE__))
@@ -245,6 +249,11 @@ int xfs_rw_bdev(struct block_device *bdev, sector_t sector, unsigned int count,
#endif /* XFS_WARN */
#endif /* DEBUG */
+/*
+ * Use this to catch metadata corruptions that are not caught by block or
+ * structure verifiers. The reason is that the verifiers check corruptions only
+ * within the scope of the object being verified.
+ */
#define XFS_IS_CORRUPT(mp, expr) \
(unlikely(expr) ? xfs_corruption_error(#expr, XFS_ERRLEVEL_LOW, (mp), \
NULL, 0, __FILE__, __LINE__, \
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 90a94a5b6f7e..153f3c378f9f 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -112,6 +112,10 @@ xfs_rtcopy_summary(
error = xfs_rtget_summary(oargs, log, bbno, &sum);
if (error)
goto out;
+ if (XFS_IS_CORRUPT(oargs->mp, sum < 0)) {
+ error = -EFSCORRUPTED;
+ goto out;
+ }
if (sum == 0)
continue;
error = xfs_rtmodify_summary(oargs, log, bbno, -sum);
@@ -120,7 +124,6 @@ xfs_rtcopy_summary(
error = xfs_rtmodify_summary(nargs, log, bbno, sum);
if (error)
goto out;
- ASSERT(sum > 0);
}
}
error = 0;
@@ -1047,6 +1050,15 @@ xfs_growfs_rt_bmblock(
*/
xfs_trans_resv_calc(mp, &mp->m_resv);
+ /*
+ * Sync sb counters now to reflect the updated values. Lazy counters are
+ * not always updated and in order to avoid inconsistencies between
+ * frextents and rtextents, it is better to sync the counters.
+ */
+
+ if (xfs_has_lazysbcount(mp))
+ xfs_log_sb(args.tp);
+
error = xfs_trans_commit(args.tp);
if (error)
goto out_free;
@@ -1079,17 +1091,27 @@ xfs_last_rtgroup_extents(
}
/*
- * Calculate the last rbmblock currently used.
+ * This will return the bitmap block number (indexed at 0) that will be
+ * extended/modified. There are 2 cases here:
+ * 1. The size of the rtg is such that it is a multiple of
+ * xfs_rtbitmap_rtx_per_rbmblock() i.e, an integral number of bitmap blocks
+ * are completely filled up. In this case, we should return
+ * 1 + (the last used bitmap block number).
+ * 2. The size of the rtg is not an multiple of xfs_rtbitmap_rtx_per_rbmblock().
+ * Here we will return the block number of last used block number. In this
+ * case, we will modify the last used bitmap block to extend the size of the
+ * rtgroup.
*
* This also deals with the case where there were no rtextents before.
*/
static xfs_fileoff_t
-xfs_last_rt_bmblock(
+xfs_last_rt_bmblock_to_extend(
struct xfs_rtgroup *rtg)
{
struct xfs_mount *mp = rtg_mount(rtg);
xfs_rgnumber_t rgno = rtg_rgno(rtg);
xfs_fileoff_t bmbno = 0;
+ unsigned int mod = 0;
ASSERT(!mp->m_sb.sb_rgcount || rgno >= mp->m_sb.sb_rgcount - 1);
@@ -1097,9 +1119,16 @@ xfs_last_rt_bmblock(
xfs_rtxnum_t nrext = xfs_last_rtgroup_extents(mp);
/* Also fill up the previous block if not entirely full. */
- bmbno = xfs_rtbitmap_blockcount_len(mp, nrext);
- if (xfs_rtx_to_rbmword(mp, nrext) != 0)
- bmbno--;
+ /* We are doing a -1 to convert it to a 0 based index */
+ bmbno = xfs_rtbitmap_blockcount_len(mp, nrext) - 1;
+ div_u64_rem(nrext, xfs_rtbitmap_rtx_per_rbmblock(mp), &mod);
+ /*
+ * mod = 0 means that all the current blocks are full. So
+ * return the next block number to be used for the rtgroup
+ * growth.
+ */
+ if (mod == 0)
+ bmbno++;
}
return bmbno;
@@ -1204,7 +1233,8 @@ xfs_growfs_rtg(
goto out_rele;
}
- for (bmbno = xfs_last_rt_bmblock(rtg); bmbno < bmblocks; bmbno++) {
+ for (bmbno = xfs_last_rt_bmblock_to_extend(rtg); bmbno < bmblocks;
+ bmbno++) {
error = xfs_growfs_rt_bmblock(rtg, nrblocks, rextsize, bmbno);
if (error)
goto out_error;
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
index 017db0361cd8..c13d600732c9 100644
--- a/fs/xfs/xfs_stats.c
+++ b/fs/xfs/xfs_stats.c
@@ -42,7 +42,7 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
{ "xstrat", xfsstats_offset(xs_write_calls) },
{ "rw", xfsstats_offset(xs_attr_get) },
{ "attr", xfsstats_offset(xs_iflush_count)},
- { "icluster", xfsstats_offset(vn_active) },
+ { "icluster", xfsstats_offset(xs_inodes_active) },
{ "vnodes", xfsstats_offset(xb_get) },
{ "buf", xfsstats_offset(xs_abtb_2) },
{ "abtb2", xfsstats_offset(xs_abtc_2) },
@@ -59,7 +59,8 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
{ "rtrefcntbt", xfsstats_offset(xs_qm_dqreclaims)},
/* we print both series of quota information together */
{ "qm", xfsstats_offset(xs_gc_read_calls)},
- { "zoned", xfsstats_offset(__pad1)},
+ { "zoned", xfsstats_offset(xs_inodes_meta)},
+ { "metafile", xfsstats_offset(xs_xstrat_bytes)},
};
/* Loop over all stats groups */
@@ -99,16 +100,20 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
void xfs_stats_clearall(struct xfsstats __percpu *stats)
{
+ uint32_t xs_inodes_active, xs_inodes_meta;
int c;
- uint32_t vn_active;
xfs_notice(NULL, "Clearing xfsstats");
for_each_possible_cpu(c) {
preempt_disable();
- /* save vn_active, it's a universal truth! */
- vn_active = per_cpu_ptr(stats, c)->s.vn_active;
+ /*
+ * Save the active / meta inode counters, as they are stateful.
+ */
+ xs_inodes_active = per_cpu_ptr(stats, c)->s.xs_inodes_active;
+ xs_inodes_meta = per_cpu_ptr(stats, c)->s.xs_inodes_meta;
memset(per_cpu_ptr(stats, c), 0, sizeof(*stats));
- per_cpu_ptr(stats, c)->s.vn_active = vn_active;
+ per_cpu_ptr(stats, c)->s.xs_inodes_active = xs_inodes_active;
+ per_cpu_ptr(stats, c)->s.xs_inodes_meta = xs_inodes_meta;
preempt_enable();
}
}
diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h
index 153d2381d0a8..57c32b86c358 100644
--- a/fs/xfs/xfs_stats.h
+++ b/fs/xfs/xfs_stats.h
@@ -100,14 +100,14 @@ struct __xfsstats {
uint32_t xs_iflush_count;
uint32_t xs_icluster_flushcnt;
uint32_t xs_icluster_flushinode;
- uint32_t vn_active; /* # vnodes not on free lists */
- uint32_t vn_alloc; /* # times vn_alloc called */
- uint32_t vn_get; /* # times vn_get called */
- uint32_t vn_hold; /* # times vn_hold called */
- uint32_t vn_rele; /* # times vn_rele called */
- uint32_t vn_reclaim; /* # times vn_reclaim called */
- uint32_t vn_remove; /* # times vn_remove called */
- uint32_t vn_free; /* # times vn_free called */
+ uint32_t xs_inodes_active;
+ uint32_t __unused_vn_alloc;
+ uint32_t __unused_vn_get;
+ uint32_t __unused_vn_hold;
+ uint32_t xs_inode_destroy;
+ uint32_t xs_inode_destroy2; /* same as xs_inode_destroy */
+ uint32_t xs_inode_mark_reclaimable;
+ uint32_t __unused_vn_free;
uint32_t xb_get;
uint32_t xb_create;
uint32_t xb_get_locked;
@@ -142,7 +142,8 @@ struct __xfsstats {
uint32_t xs_gc_read_calls;
uint32_t xs_gc_write_calls;
uint32_t xs_gc_zone_reset_calls;
- uint32_t __pad1;
+/* Metafile counters */
+ uint32_t xs_inodes_meta;
/* Extra precision counters */
uint64_t xs_xstrat_bytes;
uint64_t xs_write_bytes;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index abc45f860a73..f8de44443e81 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -712,8 +712,8 @@ xfs_fs_destroy_inode(
trace_xfs_destroy_inode(ip);
ASSERT(!rwsem_is_locked(&inode->i_rwsem));
- XFS_STATS_INC(ip->i_mount, vn_rele);
- XFS_STATS_INC(ip->i_mount, vn_remove);
+ XFS_STATS_INC(ip->i_mount, xs_inode_destroy);
+ XFS_STATS_INC(ip->i_mount, xs_inode_destroy2);
xfs_inode_mark_reclaimable(ip);
}
diff --git a/fs/xfs/xfs_verify_media.c b/fs/xfs/xfs_verify_media.c
index 069cd371619d..8bbd4ec567f8 100644
--- a/fs/xfs/xfs_verify_media.c
+++ b/fs/xfs/xfs_verify_media.c
@@ -122,7 +122,7 @@ xfs_verify_report_losses(
error = xfs_alloc_read_agf(pag, tp, 0, &agf_bp);
if (error) {
- xfs_perag_put(pag);
+ xfs_perag_rele(pag);
break;
}
@@ -158,7 +158,7 @@ xfs_verify_report_losses(
if (rtg)
xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP);
if (error) {
- xfs_group_put(xg);
+ xfs_group_rele(xg);
break;
}
}
diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c
index 67e0c8f5800f..e3d19b6dc64a 100644
--- a/fs/xfs/xfs_zone_alloc.c
+++ b/fs/xfs/xfs_zone_alloc.c
@@ -78,7 +78,7 @@ xfs_zone_account_reclaimable(
struct xfs_rtgroup *rtg,
uint32_t freed)
{
- struct xfs_group *xg = &rtg->rtg_group;
+ struct xfs_group *xg = rtg_group(rtg);
struct xfs_mount *mp = rtg_mount(rtg);
struct xfs_zone_info *zi = mp->m_zone_info;
uint32_t used = rtg_rmap(rtg)->i_used_blocks;
@@ -759,7 +759,7 @@ xfs_zone_alloc_blocks(
trace_xfs_zone_alloc_blocks(oz, allocated, count_fsb);
- *sector = xfs_gbno_to_daddr(&rtg->rtg_group, 0);
+ *sector = xfs_gbno_to_daddr(rtg_group(rtg), 0);
*is_seq = bdev_zone_is_seq(mp->m_rtdev_targp->bt_bdev, *sector);
if (!*is_seq)
*sector += XFS_FSB_TO_BB(mp, allocated);
@@ -1080,7 +1080,7 @@ xfs_init_zone(
if (write_pointer == 0) {
/* zone is empty */
atomic_inc(&zi->zi_nr_free_zones);
- xfs_group_set_mark(&rtg->rtg_group, XFS_RTG_FREE);
+ xfs_group_set_mark(rtg_group(rtg), XFS_RTG_FREE);
iz->available += rtg_blocks(rtg);
} else if (write_pointer < rtg_blocks(rtg)) {
/* zone is open */
diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c
index 48c6cf584447..7efeecd2d85f 100644
--- a/fs/xfs/xfs_zone_gc.c
+++ b/fs/xfs/xfs_zone_gc.c
@@ -627,7 +627,7 @@ xfs_zone_gc_alloc_blocks(
if (!*count_fsb)
return NULL;
- *daddr = xfs_gbno_to_daddr(&oz->oz_rtg->rtg_group, 0);
+ *daddr = xfs_gbno_to_daddr(rtg_group(oz->oz_rtg), 0);
*is_seq = bdev_zone_is_seq(mp->m_rtdev_targp->bt_bdev, *daddr);
if (!*is_seq)
*daddr += XFS_FSB_TO_BB(mp, oz->oz_allocated);
@@ -702,7 +702,7 @@ xfs_zone_gc_start_chunk(
chunk->data = data;
chunk->oz = oz;
chunk->victim_rtg = iter->victim_rtg;
- atomic_inc(&chunk->victim_rtg->rtg_group.xg_active_ref);
+ atomic_inc(&rtg_group(chunk->victim_rtg)->xg_active_ref);
atomic_inc(&chunk->victim_rtg->rtg_gccount);
bio->bi_iter.bi_sector = xfs_rtb_to_daddr(mp, chunk->old_startblock);
@@ -788,7 +788,7 @@ xfs_zone_gc_split_write(
atomic_inc(&chunk->oz->oz_ref);
split_chunk->victim_rtg = chunk->victim_rtg;
- atomic_inc(&chunk->victim_rtg->rtg_group.xg_active_ref);
+ atomic_inc(&rtg_group(chunk->victim_rtg)->xg_active_ref);
atomic_inc(&chunk->victim_rtg->rtg_gccount);
chunk->offset += split_len;
@@ -888,7 +888,7 @@ xfs_zone_gc_finish_reset(
goto out;
}
- xfs_group_set_mark(&rtg->rtg_group, XFS_RTG_FREE);
+ xfs_group_set_mark(rtg_group(rtg), XFS_RTG_FREE);
atomic_inc(&zi->zi_nr_free_zones);
xfs_zoned_add_available(mp, rtg_blocks(rtg));
@@ -917,7 +917,7 @@ xfs_submit_zone_reset_bio(
XFS_STATS_INC(mp, xs_gc_zone_reset_calls);
- bio->bi_iter.bi_sector = xfs_gbno_to_daddr(&rtg->rtg_group, 0);
+ bio->bi_iter.bi_sector = xfs_gbno_to_daddr(rtg_group(rtg), 0);
if (!bdev_zone_is_seq(bio->bi_bdev, bio->bi_iter.bi_sector)) {
/*
* Also use the bio to drive the state machine when neither
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index b78b53198a2e..05b34a6355b0 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -124,7 +124,7 @@ struct bpf_map_ops {
u32 (*map_fd_sys_lookup_elem)(void *ptr);
void (*map_seq_show_elem)(struct bpf_map *map, void *key,
struct seq_file *m);
- int (*map_check_btf)(const struct bpf_map *map,
+ int (*map_check_btf)(struct bpf_map *map,
const struct btf *btf,
const struct btf_type *key_type,
const struct btf_type *value_type);
@@ -656,7 +656,7 @@ static inline bool bpf_map_support_seq_show(const struct bpf_map *map)
map->ops->map_seq_show_elem;
}
-int map_check_no_btf(const struct bpf_map *map,
+int map_check_no_btf(struct bpf_map *map,
const struct btf *btf,
const struct btf_type *key_type,
const struct btf_type *value_type);
diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
index 85efa9772530..8157e8da61d4 100644
--- a/include/linux/bpf_local_storage.h
+++ b/include/linux/bpf_local_storage.h
@@ -176,7 +176,7 @@ u32 bpf_local_storage_destroy(struct bpf_local_storage *local_storage);
void bpf_local_storage_map_free(struct bpf_map *map,
struct bpf_local_storage_cache *cache);
-int bpf_local_storage_map_check_btf(const struct bpf_map *map,
+int bpf_local_storage_map_check_btf(struct bpf_map *map,
const struct btf *btf,
const struct btf_type *key_type,
const struct btf_type *value_type);
diff --git a/include/linux/bpf_mem_alloc.h b/include/linux/bpf_mem_alloc.h
index e45162ef59bb..4ce0d27f8ea2 100644
--- a/include/linux/bpf_mem_alloc.h
+++ b/include/linux/bpf_mem_alloc.h
@@ -14,6 +14,8 @@ struct bpf_mem_alloc {
struct obj_cgroup *objcg;
bool percpu;
struct work_struct work;
+ void (*dtor_ctx_free)(void *ctx);
+ void *dtor_ctx;
};
/* 'size != 0' is for bpf_mem_alloc which manages fixed-size objects.
@@ -32,6 +34,10 @@ int bpf_mem_alloc_percpu_init(struct bpf_mem_alloc *ma, struct obj_cgroup *objcg
/* The percpu allocation with a specific unit size. */
int bpf_mem_alloc_percpu_unit_init(struct bpf_mem_alloc *ma, int size);
void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma);
+void bpf_mem_alloc_set_dtor(struct bpf_mem_alloc *ma,
+ void (*dtor)(void *obj, void *ctx),
+ void (*dtor_ctx_free)(void *ctx),
+ void *ctx);
/* Check the allocation size for kmalloc equivalent allocator */
int bpf_mem_alloc_check_size(bool percpu, size_t size);
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 90536b2bc42e..51ef13ed756e 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -339,8 +339,11 @@ static inline struct folio *folio_alloc_mpol_noprof(gfp_t gfp, unsigned int orde
{
return folio_alloc_noprof(gfp, order);
}
-#define vma_alloc_folio_noprof(gfp, order, vma, addr) \
- folio_alloc_noprof(gfp, order)
+static inline struct folio *vma_alloc_folio_noprof(gfp_t gfp, int order,
+ struct vm_area_struct *vma, unsigned long addr)
+{
+ return folio_alloc_noprof(gfp, order);
+}
#endif
#define alloc_pages(...) alloc_hooks(alloc_pages_noprof(__VA_ARGS__))
diff --git a/include/linux/gfp_types.h b/include/linux/gfp_types.h
index 814bb2892f99..6c75df30a281 100644
--- a/include/linux/gfp_types.h
+++ b/include/linux/gfp_types.h
@@ -139,6 +139,8 @@ enum {
* %__GFP_ACCOUNT causes the allocation to be accounted to kmemcg.
*
* %__GFP_NO_OBJ_EXT causes slab allocation to have no object extension.
+ * mark_obj_codetag_empty() should be called upon freeing for objects allocated
+ * with this flag to indicate that their NULL tags are expected and normal.
*/
#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE)
#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE)
diff --git a/include/linux/indirect_call_wrapper.h b/include/linux/indirect_call_wrapper.h
index 35227d47cfc9..dc272b514a01 100644
--- a/include/linux/indirect_call_wrapper.h
+++ b/include/linux/indirect_call_wrapper.h
@@ -16,22 +16,26 @@
*/
#define INDIRECT_CALL_1(f, f1, ...) \
({ \
- likely(f == f1) ? f1(__VA_ARGS__) : f(__VA_ARGS__); \
+ typeof(f) __f1 = (f); \
+ likely(__f1 == f1) ? f1(__VA_ARGS__) : __f1(__VA_ARGS__); \
})
#define INDIRECT_CALL_2(f, f2, f1, ...) \
({ \
- likely(f == f2) ? f2(__VA_ARGS__) : \
- INDIRECT_CALL_1(f, f1, __VA_ARGS__); \
+ typeof(f) __f2 = (f); \
+ likely(__f2 == f2) ? f2(__VA_ARGS__) : \
+ INDIRECT_CALL_1(__f2, f1, __VA_ARGS__); \
})
#define INDIRECT_CALL_3(f, f3, f2, f1, ...) \
({ \
- likely(f == f3) ? f3(__VA_ARGS__) : \
- INDIRECT_CALL_2(f, f2, f1, __VA_ARGS__); \
+ typeof(f) __f3 = (f); \
+ likely(__f3 == f3) ? f3(__VA_ARGS__) : \
+ INDIRECT_CALL_2(__f3, f2, f1, __VA_ARGS__); \
})
#define INDIRECT_CALL_4(f, f4, f3, f2, f1, ...) \
({ \
- likely(f == f4) ? f4(__VA_ARGS__) : \
- INDIRECT_CALL_3(f, f3, f2, f1, __VA_ARGS__); \
+ typeof(f) __f4 = (f); \
+ likely(__f4 == f4) ? f4(__VA_ARGS__) : \
+ INDIRECT_CALL_3(__f4, f3, f2, f1, __VA_ARGS__); \
})
#define INDIRECT_CALLABLE_DECLARE(f) f
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 70c0948f978e..0225121f3013 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -394,6 +394,7 @@
#define GITS_TYPER_VLPIS (1UL << 1)
#define GITS_TYPER_ITT_ENTRY_SIZE_SHIFT 4
#define GITS_TYPER_ITT_ENTRY_SIZE GENMASK_ULL(7, 4)
+#define GITS_TYPER_IDBITS GENMASK_ULL(12, 8)
#define GITS_TYPER_IDBITS_SHIFT 8
#define GITS_TYPER_DEVBITS_SHIFT 13
#define GITS_TYPER_DEVBITS GENMASK_ULL(17, 13)
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index fdef2c155c27..d1c3d4941854 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -434,8 +434,44 @@ extern unsigned long preset_lpj;
/*
* Convert various time units to each other:
*/
-extern unsigned int jiffies_to_msecs(const unsigned long j);
-extern unsigned int jiffies_to_usecs(const unsigned long j);
+
+#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
+/**
+ * jiffies_to_msecs - Convert jiffies to milliseconds
+ * @j: jiffies value
+ *
+ * This inline version takes care of HZ in {100,250,1000}.
+ *
+ * Return: milliseconds value
+ */
+static inline unsigned int jiffies_to_msecs(const unsigned long j)
+{
+ return (MSEC_PER_SEC / HZ) * j;
+}
+#else
+unsigned int jiffies_to_msecs(const unsigned long j);
+#endif
+
+#if !(USEC_PER_SEC % HZ)
+/**
+ * jiffies_to_usecs - Convert jiffies to microseconds
+ * @j: jiffies value
+ *
+ * Return: microseconds value
+ */
+static inline unsigned int jiffies_to_usecs(const unsigned long j)
+{
+ /*
+ * Hz usually doesn't go much further MSEC_PER_SEC.
+ * jiffies_to_usecs() and usecs_to_jiffies() depend on that.
+ */
+ BUILD_BUG_ON(HZ > USEC_PER_SEC);
+
+ return (USEC_PER_SEC / HZ) * j;
+}
+#else
+unsigned int jiffies_to_usecs(const unsigned long j);
+#endif
/**
* jiffies_to_nsecs - Convert jiffies to nanoseconds
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index c92c1149ee6e..a01a474719a7 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -7,6 +7,24 @@
struct mm_struct;
+/* opaque kthread data */
+struct kthread;
+
+/*
+ * When "(p->flags & PF_KTHREAD)" is set the task is a kthread and will
+ * always remain a kthread. For kthreads p->worker_private always
+ * points to a struct kthread. For tasks that are not kthreads
+ * p->worker_private is used to point to other things.
+ *
+ * Return NULL for any task that is not a kthread.
+ */
+static inline struct kthread *tsk_is_kthread(struct task_struct *p)
+{
+ if (p->flags & PF_KTHREAD)
+ return p->worker_private;
+ return NULL;
+}
+
__printf(4, 5)
struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
void *data,
@@ -98,9 +116,10 @@ void *kthread_probe_data(struct task_struct *k);
int kthread_park(struct task_struct *k);
void kthread_unpark(struct task_struct *k);
void kthread_parkme(void);
-void kthread_exit(long result) __noreturn;
+#define kthread_exit(result) do_exit(result)
void kthread_complete_and_exit(struct completion *, long) __noreturn;
int kthreads_update_housekeeping(void);
+void kthread_do_exit(struct kthread *, long);
int kthreadd(void *unused);
extern struct task_struct *kthreadd_task;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index dde605cb894e..34759a262b28 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -253,7 +253,6 @@ bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
#endif
-#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER
union kvm_mmu_notifier_arg {
unsigned long attributes;
};
@@ -275,7 +274,6 @@ struct kvm_gfn_range {
bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
-#endif
enum {
OUTSIDE_GUEST_MODE,
@@ -849,13 +847,12 @@ struct kvm {
struct hlist_head irq_ack_notifier_list;
#endif
-#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER
struct mmu_notifier mmu_notifier;
unsigned long mmu_invalidate_seq;
long mmu_invalidate_in_progress;
gfn_t mmu_invalidate_range_start;
gfn_t mmu_invalidate_range_end;
-#endif
+
struct list_head devices;
u64 manual_dirty_log_protect;
struct dentry *debugfs_dentry;
@@ -2118,7 +2115,6 @@ extern const struct _kvm_stats_desc kvm_vm_stats_desc[];
extern const struct kvm_stats_header kvm_vcpu_stats_header;
extern const struct _kvm_stats_desc kvm_vcpu_stats_desc[];
-#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER
static inline int mmu_invalidate_retry(struct kvm *kvm, unsigned long mmu_seq)
{
if (unlikely(kvm->mmu_invalidate_in_progress))
@@ -2196,7 +2192,6 @@ static inline bool mmu_invalidate_retry_gfn_unsafe(struct kvm *kvm,
return READ_ONCE(kvm->mmu_invalidate_seq) != mmu_seq;
}
-#endif
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
diff --git a/include/linux/liveupdate.h b/include/linux/liveupdate.h
index fe82a6c3005f..dd11fdc76a5f 100644
--- a/include/linux/liveupdate.h
+++ b/include/linux/liveupdate.h
@@ -23,8 +23,11 @@ struct file;
/**
* struct liveupdate_file_op_args - Arguments for file operation callbacks.
* @handler: The file handler being called.
- * @retrieved: The retrieve status for the 'can_finish / finish'
- * operation.
+ * @retrieve_status: The retrieve status for the 'can_finish / finish'
+ * operation. A value of 0 means the retrieve has not been
+ * attempted, a positive value means the retrieve was
+ * successful, and a negative value means the retrieve failed,
+ * and the value is the error code of the call.
* @file: The file object. For retrieve: [OUT] The callback sets
* this to the new file. For other ops: [IN] The caller sets
* this to the file being operated on.
@@ -40,7 +43,7 @@ struct file;
*/
struct liveupdate_file_op_args {
struct liveupdate_file_handler *handler;
- bool retrieved;
+ int retrieve_status;
struct file *file;
u64 serialized_data;
void *private_data;
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index e0e2c265e5d1..ba84f02c2a10 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -486,14 +486,12 @@ struct mmc_host {
struct mmc_ios ios; /* current io bus settings */
+ bool claimed; /* host exclusively claimed */
+
/* group bitfields together to minimize padding */
unsigned int use_spi_crc:1;
- unsigned int claimed:1; /* host exclusively claimed */
unsigned int doing_init_tune:1; /* initial tuning in progress */
- unsigned int can_retune:1; /* re-tuning can be used */
unsigned int doing_retune:1; /* re-tuning in progress */
- unsigned int retune_now:1; /* do re-tuning at next req */
- unsigned int retune_paused:1; /* re-tuning is temporarily disabled */
unsigned int retune_crc_disable:1; /* don't trigger retune upon crc */
unsigned int can_dma_map_merge:1; /* merging can be used */
unsigned int vqmmc_enabled:1; /* vqmmc regulator is enabled */
@@ -508,6 +506,9 @@ struct mmc_host {
int rescan_disable; /* disable card detection */
int rescan_entered; /* used with nonremovable devices */
+ bool can_retune; /* re-tuning can be used */
+ bool retune_now; /* do re-tuning at next req */
+ bool retune_paused; /* re-tuning is temporarily disabled */
int need_retune; /* re-tuning is needed */
int hold_retune; /* hold off re-tuning */
unsigned int retune_period; /* re-tuning period in secs */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d4e6e00bb90a..67e25f6d15a4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4711,7 +4711,7 @@ static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits)
static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu)
{
spin_lock(&txq->_xmit_lock);
- /* Pairs with READ_ONCE() in __dev_queue_xmit() */
+ /* Pairs with READ_ONCE() in netif_tx_owned() */
WRITE_ONCE(txq->xmit_lock_owner, cpu);
}
@@ -4729,7 +4729,7 @@ static inline void __netif_tx_release(struct netdev_queue *txq)
static inline void __netif_tx_lock_bh(struct netdev_queue *txq)
{
spin_lock_bh(&txq->_xmit_lock);
- /* Pairs with READ_ONCE() in __dev_queue_xmit() */
+ /* Pairs with READ_ONCE() in netif_tx_owned() */
WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id());
}
@@ -4738,7 +4738,7 @@ static inline bool __netif_tx_trylock(struct netdev_queue *txq)
bool ok = spin_trylock(&txq->_xmit_lock);
if (likely(ok)) {
- /* Pairs with READ_ONCE() in __dev_queue_xmit() */
+ /* Pairs with READ_ONCE() in netif_tx_owned() */
WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id());
}
return ok;
@@ -4746,14 +4746,14 @@ static inline bool __netif_tx_trylock(struct netdev_queue *txq)
static inline void __netif_tx_unlock(struct netdev_queue *txq)
{
- /* Pairs with READ_ONCE() in __dev_queue_xmit() */
+ /* Pairs with READ_ONCE() in netif_tx_owned() */
WRITE_ONCE(txq->xmit_lock_owner, -1);
spin_unlock(&txq->_xmit_lock);
}
static inline void __netif_tx_unlock_bh(struct netdev_queue *txq)
{
- /* Pairs with READ_ONCE() in __dev_queue_xmit() */
+ /* Pairs with READ_ONCE() in netif_tx_owned() */
WRITE_ONCE(txq->xmit_lock_owner, -1);
spin_unlock_bh(&txq->_xmit_lock);
}
@@ -4846,6 +4846,23 @@ static inline void netif_tx_disable(struct net_device *dev)
local_bh_enable();
}
+#ifndef CONFIG_PREEMPT_RT
+static inline bool netif_tx_owned(struct netdev_queue *txq, unsigned int cpu)
+{
+ /* Other cpus might concurrently change txq->xmit_lock_owner
+ * to -1 or to their cpu id, but not to our id.
+ */
+ return READ_ONCE(txq->xmit_lock_owner) == cpu;
+}
+
+#else
+static inline bool netif_tx_owned(struct netdev_queue *txq, unsigned int cpu)
+{
+ return rt_mutex_owner(&txq->_xmit_lock.lock) == current;
+}
+
+#endif
+
static inline void netif_addr_lock(struct net_device *dev)
{
unsigned char nest_level = 0;
diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h
index 825f5865bfc5..c8e227a3f9e2 100644
--- a/include/linux/ns_common.h
+++ b/include/linux/ns_common.h
@@ -55,6 +55,8 @@ static __always_inline bool is_ns_init_id(const struct ns_common *ns)
#define ns_common_free(__ns) __ns_common_free(to_ns_common((__ns)))
+bool may_see_all_namespaces(void);
+
static __always_inline __must_check int __ns_ref_active_read(const struct ns_common *ns)
{
return atomic_read(&ns->__ns_ref_active);
diff --git a/include/linux/overflow.h b/include/linux/overflow.h
index eddd987a8513..a8cb6319b4fb 100644
--- a/include/linux/overflow.h
+++ b/include/linux/overflow.h
@@ -42,7 +42,7 @@
* both the type-agnostic benefits of the macros while also being able to
* enforce that the return value is, in fact, checked.
*/
-static inline bool __must_check __must_check_overflow(bool overflow)
+static __always_inline bool __must_check __must_check_overflow(bool overflow)
{
return unlikely(overflow);
}
@@ -327,7 +327,7 @@ static inline bool __must_check __must_check_overflow(bool overflow)
* with any overflow causing the return value to be SIZE_MAX. The
* lvalue must be size_t to avoid implicit type conversion.
*/
-static inline size_t __must_check size_mul(size_t factor1, size_t factor2)
+static __always_inline size_t __must_check size_mul(size_t factor1, size_t factor2)
{
size_t bytes;
@@ -346,7 +346,7 @@ static inline size_t __must_check size_mul(size_t factor1, size_t factor2)
* with any overflow causing the return value to be SIZE_MAX. The
* lvalue must be size_t to avoid implicit type conversion.
*/
-static inline size_t __must_check size_add(size_t addend1, size_t addend2)
+static __always_inline size_t __must_check size_add(size_t addend1, size_t addend2)
{
size_t bytes;
@@ -367,7 +367,7 @@ static inline size_t __must_check size_add(size_t addend1, size_t addend2)
* argument may be SIZE_MAX (or the result with be forced to SIZE_MAX).
* The lvalue must be size_t to avoid implicit type conversion.
*/
-static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
+static __always_inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
{
size_t bytes;
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 41037c513f06..64921b10ac74 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -545,22 +545,10 @@ static inline int pm_runtime_resume_and_get(struct device *dev)
*
* Decrement the runtime PM usage counter of @dev and if it turns out to be
* equal to 0, queue up a work item for @dev like in pm_request_idle().
- *
- * Return:
- * * 1: Success. Usage counter dropped to zero, but device was already suspended.
- * * 0: Success.
- * * -EINVAL: Runtime PM error.
- * * -EACCES: Runtime PM disabled.
- * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status
- * change ongoing.
- * * -EBUSY: Runtime PM child_count non-zero.
- * * -EPERM: Device PM QoS resume latency 0.
- * * -EINPROGRESS: Suspend already in progress.
- * * -ENOSYS: CONFIG_PM not enabled.
*/
-static inline int pm_runtime_put(struct device *dev)
+static inline void pm_runtime_put(struct device *dev)
{
- return __pm_runtime_idle(dev, RPM_GET_PUT | RPM_ASYNC);
+ __pm_runtime_idle(dev, RPM_GET_PUT | RPM_ASYNC);
}
/**
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 876358cfe1b1..d862fa610270 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -248,6 +248,7 @@ int trace_rb_cpu_prepare(unsigned int cpu, struct hlist_node *node);
int ring_buffer_map(struct trace_buffer *buffer, int cpu,
struct vm_area_struct *vma);
+void ring_buffer_map_dup(struct trace_buffer *buffer, int cpu);
int ring_buffer_unmap(struct trace_buffer *buffer, int cpu);
int ring_buffer_map_get_reader(struct trace_buffer *buffer, int cpu);
#endif /* _LINUX_RING_BUFFER_H */
diff --git a/include/linux/rseq.h b/include/linux/rseq.h
index 7a01a0760405..b9d62fc2140d 100644
--- a/include/linux/rseq.h
+++ b/include/linux/rseq.h
@@ -146,6 +146,18 @@ static inline void rseq_fork(struct task_struct *t, u64 clone_flags)
t->rseq = current->rseq;
}
+/*
+ * Value returned by getauxval(AT_RSEQ_ALIGN) and expected by rseq
+ * registration. This is the active rseq area size rounded up to next
+ * power of 2, which guarantees that the rseq structure will always be
+ * aligned on the nearest power of two large enough to contain it, even
+ * as it grows.
+ */
+static inline unsigned int rseq_alloc_align(void)
+{
+ return 1U << get_count_order(offsetof(struct rseq, end));
+}
+
#else /* CONFIG_RSEQ */
static inline void rseq_handle_slowpath(struct pt_regs *regs) { }
static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { }
diff --git a/include/linux/rseq_entry.h b/include/linux/rseq_entry.h
index cbc4a791618b..c6831c93cd6e 100644
--- a/include/linux/rseq_entry.h
+++ b/include/linux/rseq_entry.h
@@ -216,10 +216,10 @@ efault:
}
#else /* CONFIG_RSEQ_SLICE_EXTENSION */
-static inline bool rseq_slice_extension_enabled(void) { return false; }
-static inline bool rseq_arm_slice_extension_timer(void) { return false; }
-static inline void rseq_slice_clear_grant(struct task_struct *t) { }
-static inline bool rseq_grant_slice_extension(bool work_pending) { return false; }
+static __always_inline bool rseq_slice_extension_enabled(void) { return false; }
+static __always_inline bool rseq_arm_slice_extension_timer(void) { return false; }
+static __always_inline void rseq_slice_clear_grant(struct task_struct *t) { }
+static __always_inline bool rseq_grant_slice_extension(bool work_pending) { return false; }
#endif /* !CONFIG_RSEQ_SLICE_EXTENSION */
bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 074ad4ef3d81..a7b4a980eb2f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -579,6 +579,7 @@ struct sched_entity {
u64 deadline;
u64 min_vruntime;
u64 min_slice;
+ u64 max_slice;
struct list_head group_node;
unsigned char on_rq;
diff --git a/include/linux/slab.h b/include/linux/slab.h
index a5a5e4108ae5..15a60b501b95 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -517,18 +517,6 @@ void kfree_sensitive(const void *objp);
DEFINE_FREE(kfree, void *, if (!IS_ERR_OR_NULL(_T)) kfree(_T))
DEFINE_FREE(kfree_sensitive, void *, if (_T) kfree_sensitive(_T))
-/**
- * ksize - Report actual allocation size of associated object
- *
- * @objp: Pointer returned from a prior kmalloc()-family allocation.
- *
- * This should not be used for writing beyond the originally requested
- * allocation size. Either use krealloc() or round up the allocation size
- * with kmalloc_size_roundup() prior to allocation. If this is used to
- * access beyond the originally requested allocation size, UBSAN_BOUNDS
- * and/or FORTIFY_SOURCE may trip, since they only know about the
- * originally allocated size via the __alloc_size attribute.
- */
size_t ksize(const void *objp);
#ifdef CONFIG_PRINTK
diff --git a/include/linux/tnum.h b/include/linux/tnum.h
index fa4654ffb621..ca2cfec8de08 100644
--- a/include/linux/tnum.h
+++ b/include/linux/tnum.h
@@ -131,4 +131,7 @@ static inline bool tnum_subreg_is_const(struct tnum a)
return !(tnum_subreg(a)).mask;
}
+/* Returns the smallest member of t larger than z */
+u64 tnum_step(struct tnum t, u64 z);
+
#endif /* _LINUX_TNUM_H */
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 1f3804245c06..809e4f7dfdbd 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -647,36 +647,22 @@ static inline void user_access_restore(unsigned long flags) { }
/* Define RW variant so the below _mode macro expansion works */
#define masked_user_rw_access_begin(u) masked_user_access_begin(u)
#define user_rw_access_begin(u, s) user_access_begin(u, s)
-#define user_rw_access_end() user_access_end()
/* Scoped user access */
-#define USER_ACCESS_GUARD(_mode) \
-static __always_inline void __user * \
-class_user_##_mode##_begin(void __user *ptr) \
-{ \
- return ptr; \
-} \
- \
-static __always_inline void \
-class_user_##_mode##_end(void __user *ptr) \
-{ \
- user_##_mode##_access_end(); \
-} \
- \
-DEFINE_CLASS(user_ ##_mode## _access, void __user *, \
- class_user_##_mode##_end(_T), \
- class_user_##_mode##_begin(ptr), void __user *ptr) \
- \
-static __always_inline class_user_##_mode##_access_t \
-class_user_##_mode##_access_ptr(void __user *scope) \
-{ \
- return scope; \
-}
-USER_ACCESS_GUARD(read)
-USER_ACCESS_GUARD(write)
-USER_ACCESS_GUARD(rw)
-#undef USER_ACCESS_GUARD
+/* Cleanup wrapper functions */
+static __always_inline void __scoped_user_read_access_end(const void *p)
+{
+ user_read_access_end();
+};
+static __always_inline void __scoped_user_write_access_end(const void *p)
+{
+ user_write_access_end();
+};
+static __always_inline void __scoped_user_rw_access_end(const void *p)
+{
+ user_access_end();
+};
/**
* __scoped_user_access_begin - Start a scoped user access
@@ -750,13 +736,13 @@ USER_ACCESS_GUARD(rw)
*
* Don't use directly. Use scoped_masked_user_$MODE_access() instead.
*/
-#define __scoped_user_access(mode, uptr, size, elbl) \
-for (bool done = false; !done; done = true) \
- for (void __user *_tmpptr = __scoped_user_access_begin(mode, uptr, size, elbl); \
- !done; done = true) \
- for (CLASS(user_##mode##_access, scope)(_tmpptr); !done; done = true) \
- /* Force modified pointer usage within the scope */ \
- for (const typeof(uptr) uptr = _tmpptr; !done; done = true)
+#define __scoped_user_access(mode, uptr, size, elbl) \
+for (bool done = false; !done; done = true) \
+ for (auto _tmpptr = __scoped_user_access_begin(mode, uptr, size, elbl); \
+ !done; done = true) \
+ /* Force modified pointer usage within the scope */ \
+ for (const auto uptr __cleanup(__scoped_user_##mode##_access_end) = \
+ _tmpptr; !done; done = true)
/**
* scoped_user_read_access_size - Start a scoped user read access with given size
diff --git a/include/linux/usb/r8152.h b/include/linux/usb/r8152.h
index 2ca60828f28b..1502b2a355f9 100644
--- a/include/linux/usb/r8152.h
+++ b/include/linux/usb/r8152.h
@@ -32,6 +32,7 @@
#define VENDOR_ID_DLINK 0x2001
#define VENDOR_ID_DELL 0x413c
#define VENDOR_ID_ASUS 0x0b05
+#define VENDOR_ID_TRENDNET 0x20f4
#if IS_REACHABLE(CONFIG_USB_RTL8152)
extern u8 rtl8152_get_version(struct usb_interface *intf);
diff --git a/include/net/act_api.h b/include/net/act_api.h
index e1e8f0f7dacb..d11b79107930 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -70,6 +70,7 @@ struct tc_action {
#define TCA_ACT_FLAGS_REPLACE (1U << (TCA_ACT_FLAGS_USER_BITS + 2))
#define TCA_ACT_FLAGS_NO_RTNL (1U << (TCA_ACT_FLAGS_USER_BITS + 3))
#define TCA_ACT_FLAGS_AT_INGRESS (1U << (TCA_ACT_FLAGS_USER_BITS + 4))
+#define TCA_ACT_FLAGS_AT_INGRESS_OR_CLSACT (1U << (TCA_ACT_FLAGS_USER_BITS + 5))
/* Update lastuse only if needed, to avoid dirtying a cache line.
* We use a temp variable to avoid fetching jiffies twice.
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 7f8fda2fdfcb..d3520ecfa7f0 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -699,6 +699,7 @@ void bond_debug_register(struct bonding *bond);
void bond_debug_unregister(struct bonding *bond);
void bond_debug_reregister(struct bonding *bond);
const char *bond_mode_name(int mode);
+bool __bond_xdp_check(int mode, int xmit_policy);
bool bond_xdp_check(struct bonding *bond, int mode);
void bond_setup(struct net_device *bond_dev);
unsigned int bond_get_num_tx_queues(void);
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index 282e29237d93..c16de5b7963f 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -175,7 +175,7 @@ static inline bool inet6_match(const struct net *net, const struct sock *sk,
{
if (!net_eq(sock_net(sk), net) ||
sk->sk_family != AF_INET6 ||
- sk->sk_portpair != ports ||
+ READ_ONCE(sk->sk_portpair) != ports ||
!ipv6_addr_equal(&sk->sk_v6_daddr, saddr) ||
!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
return false;
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 8bddf58b1a85..1a47e7ed371a 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -344,7 +344,7 @@ static inline bool inet_match(const struct net *net, const struct sock *sk,
int dif, int sdif)
{
if (!net_eq(sock_net(sk), net) ||
- sk->sk_portpair != ports ||
+ READ_ONCE(sk->sk_portpair) != ports ||
sk->sk_addrpair != cookie)
return false;
diff --git a/include/net/ip.h b/include/net/ip.h
index 52264c459357..f39a3787fedd 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -101,7 +101,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm,
ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if);
ipcm->addr = inet->inet_saddr;
- ipcm->protocol = inet->inet_num;
+ ipcm->protocol = READ_ONCE(inet->inet_num);
}
#define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb))
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index b4495c38e0a0..318593743b6e 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -559,7 +559,7 @@ static inline u32 fib_multipath_hash_from_keys(const struct net *net,
siphash_aligned_key_t hash_key;
u32 mp_seed;
- mp_seed = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed).mp_seed;
+ mp_seed = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed.mp_seed);
fib_multipath_hash_construct_key(&hash_key, mp_seed);
return flow_hash_from_keys_seed(keys, &hash_key);
diff --git a/include/net/libeth/xsk.h b/include/net/libeth/xsk.h
index 481a7b28e6f2..82b5d21aae87 100644
--- a/include/net/libeth/xsk.h
+++ b/include/net/libeth/xsk.h
@@ -597,6 +597,7 @@ __libeth_xsk_run_pass(struct libeth_xdp_buff *xdp,
* @pending: current number of XSkFQEs to refill
* @thresh: threshold below which the queue is refilled
* @buf_len: HW-writeable length per each buffer
+ * @truesize: step between consecutive buffers, 0 if none exists
* @nid: ID of the closest NUMA node with memory
*/
struct libeth_xskfq {
@@ -614,6 +615,8 @@ struct libeth_xskfq {
u32 thresh;
u32 buf_len;
+ u32 truesize;
+
int nid;
};
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 40e8106e71f0..acfa2ceef9c0 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -309,11 +309,13 @@ static inline void *nft_elem_priv_cast(const struct nft_elem_priv *priv)
* @NFT_ITER_UNSPEC: unspecified, to catch errors
* @NFT_ITER_READ: read-only iteration over set elements
* @NFT_ITER_UPDATE: iteration under mutex to update set element state
+ * @NFT_ITER_UPDATE_CLONE: clone set before iteration under mutex to update element
*/
enum nft_iter_type {
NFT_ITER_UNSPEC,
NFT_ITER_READ,
NFT_ITER_UPDATE,
+ NFT_ITER_UPDATE_CLONE,
};
struct nft_set;
@@ -1845,6 +1847,11 @@ struct nft_trans_gc {
struct rcu_head rcu;
};
+static inline int nft_trans_gc_space(const struct nft_trans_gc *trans)
+{
+ return NFT_TRANS_GC_BATCHCOUNT - trans->count;
+}
+
static inline void nft_ctx_update(struct nft_ctx *ctx,
const struct nft_trans *trans)
{
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 31c25a6d6acc..c355300893a3 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -781,13 +781,23 @@ static inline bool skb_skip_tc_classify(struct sk_buff *skb)
static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i)
{
struct Qdisc *qdisc;
+ bool nolock;
for (; i < dev->num_tx_queues; i++) {
qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc);
if (qdisc) {
+ nolock = qdisc->flags & TCQ_F_NOLOCK;
+
+ if (nolock)
+ spin_lock_bh(&qdisc->seqlock);
spin_lock_bh(qdisc_lock(qdisc));
qdisc_reset(qdisc);
spin_unlock_bh(qdisc_lock(qdisc));
+ if (nolock) {
+ clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
+ clear_bit(__QDISC_STATE_DRAINING, &qdisc->state);
+ spin_unlock_bh(&qdisc->seqlock);
+ }
}
}
}
diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h
index cddebafb9f77..6f996229167b 100644
--- a/include/net/secure_seq.h
+++ b/include/net/secure_seq.h
@@ -5,16 +5,47 @@
#include <linux/types.h>
struct net;
+extern struct net init_net;
+
+union tcp_seq_and_ts_off {
+ struct {
+ u32 seq;
+ u32 ts_off;
+ };
+ u64 hash64;
+};
u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport);
u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
__be16 dport);
-u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
- __be16 sport, __be16 dport);
-u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr);
-u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr,
- __be16 sport, __be16 dport);
-u32 secure_tcpv6_ts_off(const struct net *net,
- const __be32 *saddr, const __be32 *daddr);
+union tcp_seq_and_ts_off
+secure_tcp_seq_and_ts_off(const struct net *net, __be32 saddr, __be32 daddr,
+ __be16 sport, __be16 dport);
+
+static inline u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
+ __be16 sport, __be16 dport)
+{
+ union tcp_seq_and_ts_off ts;
+
+ ts = secure_tcp_seq_and_ts_off(&init_net, saddr, daddr,
+ sport, dport);
+
+ return ts.seq;
+}
+
+union tcp_seq_and_ts_off
+secure_tcpv6_seq_and_ts_off(const struct net *net, const __be32 *saddr,
+ const __be32 *daddr,
+ __be16 sport, __be16 dport);
+
+static inline u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr,
+ __be16 sport, __be16 dport)
+{
+ union tcp_seq_and_ts_off ts;
+
+ ts = secure_tcpv6_seq_and_ts_off(&init_net, saddr, daddr,
+ sport, dport);
+ return ts.seq;
+}
#endif /* _NET_SECURE_SEQ */
diff --git a/include/net/tc_act/tc_gate.h b/include/net/tc_act/tc_gate.h
index b147a3bb1a46..e0fded18e18c 100644
--- a/include/net/tc_act/tc_gate.h
+++ b/include/net/tc_act/tc_gate.h
@@ -32,6 +32,7 @@ struct tcf_gate_params {
s32 tcfg_clockid;
size_t num_entries;
struct list_head entries;
+ struct rcu_head rcu;
};
#define GATE_ACT_GATE_OPEN BIT(0)
@@ -39,7 +40,7 @@ struct tcf_gate_params {
struct tcf_gate {
struct tc_action common;
- struct tcf_gate_params param;
+ struct tcf_gate_params __rcu *param;
u8 current_gate_status;
ktime_t current_close_time;
u32 current_entry_octets;
@@ -51,47 +52,65 @@ struct tcf_gate {
#define to_gate(a) ((struct tcf_gate *)a)
+static inline struct tcf_gate_params *tcf_gate_params_locked(const struct tc_action *a)
+{
+ struct tcf_gate *gact = to_gate(a);
+
+ return rcu_dereference_protected(gact->param,
+ lockdep_is_held(&gact->tcf_lock));
+}
+
static inline s32 tcf_gate_prio(const struct tc_action *a)
{
+ struct tcf_gate_params *p;
s32 tcfg_prio;
- tcfg_prio = to_gate(a)->param.tcfg_priority;
+ p = tcf_gate_params_locked(a);
+ tcfg_prio = p->tcfg_priority;
return tcfg_prio;
}
static inline u64 tcf_gate_basetime(const struct tc_action *a)
{
+ struct tcf_gate_params *p;
u64 tcfg_basetime;
- tcfg_basetime = to_gate(a)->param.tcfg_basetime;
+ p = tcf_gate_params_locked(a);
+ tcfg_basetime = p->tcfg_basetime;
return tcfg_basetime;
}
static inline u64 tcf_gate_cycletime(const struct tc_action *a)
{
+ struct tcf_gate_params *p;
u64 tcfg_cycletime;
- tcfg_cycletime = to_gate(a)->param.tcfg_cycletime;
+ p = tcf_gate_params_locked(a);
+ tcfg_cycletime = p->tcfg_cycletime;
return tcfg_cycletime;
}
static inline u64 tcf_gate_cycletimeext(const struct tc_action *a)
{
+ struct tcf_gate_params *p;
u64 tcfg_cycletimeext;
- tcfg_cycletimeext = to_gate(a)->param.tcfg_cycletime_ext;
+ p = tcf_gate_params_locked(a);
+ tcfg_cycletimeext = p->tcfg_cycletime_ext;
return tcfg_cycletimeext;
}
static inline u32 tcf_gate_num_entries(const struct tc_action *a)
{
+ struct tcf_gate_params *p;
u32 num_entries;
- num_entries = to_gate(a)->param.num_entries;
+ p = tcf_gate_params_locked(a);
+ num_entries = p->num_entries;
return num_entries;
}
@@ -105,7 +124,7 @@ static inline struct action_gate_entry
u32 num_entries;
int i = 0;
- p = &to_gate(a)->param;
+ p = tcf_gate_params_locked(a);
num_entries = p->num_entries;
list_for_each_entry(entry, &p->entries, list)
diff --git a/include/net/tc_act/tc_ife.h b/include/net/tc_act/tc_ife.h
index c7f24a2da1ca..24d4d5a62b3c 100644
--- a/include/net/tc_act/tc_ife.h
+++ b/include/net/tc_act/tc_ife.h
@@ -13,15 +13,13 @@ struct tcf_ife_params {
u8 eth_src[ETH_ALEN];
u16 eth_type;
u16 flags;
-
+ struct list_head metalist;
struct rcu_head rcu;
};
struct tcf_ife_info {
struct tc_action common;
struct tcf_ife_params __rcu *params;
- /* list of metaids allowed */
- struct list_head metalist;
};
#define to_ife(a) ((struct tcf_ife_info *)a)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index fea6081cf6c7..3f1fe954e6aa 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -43,6 +43,7 @@
#include <net/dst.h>
#include <net/mptcp.h>
#include <net/xfrm.h>
+#include <net/secure_seq.h>
#include <linux/seq_file.h>
#include <linux/memcontrol.h>
@@ -2477,8 +2478,9 @@ struct tcp_request_sock_ops {
struct flowi *fl,
struct request_sock *req,
u32 tw_isn);
- u32 (*init_seq)(const struct sk_buff *skb);
- u32 (*init_ts_off)(const struct net *net, const struct sk_buff *skb);
+ union tcp_seq_and_ts_off (*init_seq_and_ts_off)(
+ const struct net *net,
+ const struct sk_buff *skb);
int (*send_synack)(const struct sock *sk, struct dst_entry *dst,
struct flowi *fl, struct request_sock *req,
struct tcp_fastopen_cookie *foc,
diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
index 242e34f771cc..6b9ebae2dc95 100644
--- a/include/net/xdp_sock_drv.h
+++ b/include/net/xdp_sock_drv.h
@@ -51,6 +51,11 @@ static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool)
return xsk_pool_get_chunk_size(pool) - xsk_pool_get_headroom(pool);
}
+static inline u32 xsk_pool_get_rx_frag_step(struct xsk_buff_pool *pool)
+{
+ return pool->unaligned ? 0 : xsk_pool_get_chunk_size(pool);
+}
+
static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool,
struct xdp_rxq_info *rxq)
{
@@ -122,7 +127,7 @@ static inline void xsk_buff_free(struct xdp_buff *xdp)
goto out;
list_for_each_entry_safe(pos, tmp, xskb_list, list_node) {
- list_del(&pos->list_node);
+ list_del_init(&pos->list_node);
xp_free(pos);
}
@@ -157,7 +162,7 @@ static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first)
frag = list_first_entry_or_null(&xskb->pool->xskb_list,
struct xdp_buff_xsk, list_node);
if (frag) {
- list_del(&frag->list_node);
+ list_del_init(&frag->list_node);
ret = &frag->xdp;
}
@@ -168,7 +173,7 @@ static inline void xsk_buff_del_frag(struct xdp_buff *xdp)
{
struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
- list_del(&xskb->list_node);
+ list_del_init(&xskb->list_node);
}
static inline struct xdp_buff *xsk_buff_get_head(struct xdp_buff *first)
@@ -337,6 +342,11 @@ static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool)
return 0;
}
+static inline u32 xsk_pool_get_rx_frag_step(struct xsk_buff_pool *pool)
+{
+ return 0;
+}
+
static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool,
struct xdp_rxq_info *rxq)
{
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 6de6fd8bd15e..d639ff889e64 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -181,7 +181,7 @@ void rdma_destroy_id(struct rdma_cm_id *id);
*
* It needs to be called before the RDMA identifier is bound
* to an device, which mean it should be called before
- * rdma_bind_addr(), rdma_bind_addr() and rdma_listen().
+ * rdma_bind_addr(), rdma_resolve_addr() and rdma_listen().
*/
int rdma_restrict_node_type(struct rdma_cm_id *id, u8 node_type);
diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index 7f93e754da5c..cd7920c81f85 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -440,7 +440,13 @@ TRACE_EVENT(rss_stat,
TP_fast_assign(
__entry->mm_id = mm_ptr_to_hash(mm);
- __entry->curr = !!(current->mm == mm);
+ /*
+ * curr is true if the mm matches the current task's mm_struct.
+ * Since kthreads (PF_KTHREAD) have no mm_struct of their own
+ * but can borrow one via kthread_use_mm(), we must filter them
+ * out to avoid incorrectly attributing the RSS update to them.
+ */
+ __entry->curr = current->mm == mm && !(current->flags & PF_KTHREAD);
__entry->member = member;
__entry->size = (percpu_counter_sum_positive(&mm->rss_stat[member])
<< PAGE_SHIFT);
diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
index 64a382fbc31a..2d366be46a1c 100644
--- a/include/trace/events/netfs.h
+++ b/include/trace/events/netfs.h
@@ -57,6 +57,7 @@
EM(netfs_rreq_trace_done, "DONE ") \
EM(netfs_rreq_trace_end_copy_to_cache, "END-C2C") \
EM(netfs_rreq_trace_free, "FREE ") \
+ EM(netfs_rreq_trace_intr, "INTR ") \
EM(netfs_rreq_trace_ki_complete, "KI-CMPL") \
EM(netfs_rreq_trace_recollect, "RECLLCT") \
EM(netfs_rreq_trace_redirty, "REDIRTY") \
@@ -169,7 +170,8 @@
EM(netfs_sreq_trace_put_oom, "PUT OOM ") \
EM(netfs_sreq_trace_put_wip, "PUT WIP ") \
EM(netfs_sreq_trace_put_work, "PUT WORK ") \
- E_(netfs_sreq_trace_put_terminated, "PUT TERM ")
+ EM(netfs_sreq_trace_put_terminated, "PUT TERM ") \
+ E_(netfs_sreq_trace_see_failed, "SEE FAILED ")
#define netfs_folio_traces \
EM(netfs_folio_is_uptodate, "mod-uptodate") \
diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h
index e527b24bd824..c89aede3cb12 100644
--- a/include/uapi/drm/drm_fourcc.h
+++ b/include/uapi/drm/drm_fourcc.h
@@ -401,8 +401,8 @@ extern "C" {
* implementation can multiply the values by 2^6=64. For that reason the padding
* must only contain zeros.
* index 0 = Y plane, [15:0] z:Y [6:10] little endian
- * index 1 = Cr plane, [15:0] z:Cr [6:10] little endian
- * index 2 = Cb plane, [15:0] z:Cb [6:10] little endian
+ * index 1 = Cb plane, [15:0] z:Cb [6:10] little endian
+ * index 2 = Cr plane, [15:0] z:Cr [6:10] little endian
*/
#define DRM_FORMAT_S010 fourcc_code('S', '0', '1', '0') /* 2x2 subsampled Cb (1) and Cr (2) planes 10 bits per channel */
#define DRM_FORMAT_S210 fourcc_code('S', '2', '1', '0') /* 2x1 subsampled Cb (1) and Cr (2) planes 10 bits per channel */
@@ -414,8 +414,8 @@ extern "C" {
* implementation can multiply the values by 2^4=16. For that reason the padding
* must only contain zeros.
* index 0 = Y plane, [15:0] z:Y [4:12] little endian
- * index 1 = Cr plane, [15:0] z:Cr [4:12] little endian
- * index 2 = Cb plane, [15:0] z:Cb [4:12] little endian
+ * index 1 = Cb plane, [15:0] z:Cb [4:12] little endian
+ * index 2 = Cr plane, [15:0] z:Cr [4:12] little endian
*/
#define DRM_FORMAT_S012 fourcc_code('S', '0', '1', '2') /* 2x2 subsampled Cb (1) and Cr (2) planes 12 bits per channel */
#define DRM_FORMAT_S212 fourcc_code('S', '2', '1', '2') /* 2x1 subsampled Cb (1) and Cr (2) planes 12 bits per channel */
@@ -424,8 +424,8 @@ extern "C" {
/*
* 3 plane YCbCr
* index 0 = Y plane, [15:0] Y little endian
- * index 1 = Cr plane, [15:0] Cr little endian
- * index 2 = Cb plane, [15:0] Cb little endian
+ * index 1 = Cb plane, [15:0] Cb little endian
+ * index 2 = Cr plane, [15:0] Cr little endian
*/
#define DRM_FORMAT_S016 fourcc_code('S', '0', '1', '6') /* 2x2 subsampled Cb (1) and Cr (2) planes 16 bits per channel */
#define DRM_FORMAT_S216 fourcc_code('S', '2', '1', '6') /* 2x1 subsampled Cb (1) and Cr (2) planes 16 bits per channel */
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index ec1c54b5a310..14f634ab9350 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -712,7 +712,7 @@
#define PCI_EXP_LNKCTL2_HASD 0x0020 /* HW Autonomous Speed Disable */
#define PCI_EXP_LNKSTA2 0x32 /* Link Status 2 */
#define PCI_EXP_LNKSTA2_FLIT 0x0400 /* Flit Mode Status */
-#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 0x32 /* end of v2 EPs w/ link */
+#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 0x34 /* end of v2 EPs w/ link */
#define PCI_EXP_SLTCAP2 0x34 /* Slot Capabilities 2 */
#define PCI_EXP_SLTCAP2_IBPD 0x00000001 /* In-band PD Disable Supported */
#define PCI_EXP_SLTCTL2 0x38 /* Slot Control 2 */
diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h
index 863c4a00a66b..f69344fe6c08 100644
--- a/include/uapi/linux/rseq.h
+++ b/include/uapi/linux/rseq.h
@@ -87,10 +87,17 @@ struct rseq_slice_ctrl {
};
/*
- * struct rseq is aligned on 4 * 8 bytes to ensure it is always
- * contained within a single cache-line.
+ * The original size and alignment of the allocation for struct rseq is
+ * 32 bytes.
*
- * A single struct rseq per thread is allowed.
+ * The allocation size needs to be greater or equal to
+ * max(getauxval(AT_RSEQ_FEATURE_SIZE), 32), and the allocation needs to
+ * be aligned on max(getauxval(AT_RSEQ_ALIGN), 32).
+ *
+ * As an alternative, userspace is allowed to use both the original size
+ * and alignment of 32 bytes for backward compatibility.
+ *
+ * A single active struct rseq registration per thread is allowed.
*/
struct rseq {
/*
@@ -181,9 +188,20 @@ struct rseq {
struct rseq_slice_ctrl slice_ctrl;
/*
+ * Before rseq became extensible, its original size was 32 bytes even
+ * though the active rseq area was only 20 bytes.
+ * Exposing a 32 bytes feature size would make life needlessly painful
+ * for userspace. Therefore, add a reserved byte after byte 32
+ * to bump the rseq feature size from 32 to 33.
+ * The next field to be added to the rseq area will be larger
+ * than one byte, and will replace this reserved byte.
+ */
+ __u8 __reserved;
+
+ /*
* Flexible array member at end of structure, after last feature field.
*/
char end[];
-} __attribute__((aligned(4 * sizeof(__u64))));
+} __attribute__((aligned(32)));
#endif /* _UAPI_LINUX_RSEQ_H */
diff --git a/init/Kconfig b/init/Kconfig
index c25869cf59c1..b55deae9256c 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -153,7 +153,7 @@ config CC_HAS_COUNTED_BY_PTR
config CC_HAS_BROKEN_COUNTED_BY_REF
bool
# https://github.com/llvm/llvm-project/issues/182575
- default y if CC_IS_CLANG && CLANG_VERSION < 220000
+ default y if CC_IS_CLANG && CLANG_VERSION < 220100
config CC_HAS_MULTIDIMENSIONAL_NONSTRING
def_bool $(success,echo 'char tag[][4] __attribute__((__nonstring__)) = { };' | $(CC) $(CLANG_FLAGS) -x c - -c -o /dev/null -Werror)
diff --git a/io_uring/cmd_net.c b/io_uring/cmd_net.c
index 57ddaf874611..125a81c520a6 100644
--- a/io_uring/cmd_net.c
+++ b/io_uring/cmd_net.c
@@ -146,7 +146,7 @@ static int io_uring_cmd_getsockname(struct socket *sock,
return -EINVAL;
uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr));
- ulen = u64_to_user_ptr(sqe->addr3);
+ ulen = u64_to_user_ptr(READ_ONCE(sqe->addr3));
peer = READ_ONCE(sqe->optlen);
if (peer > 1)
return -EINVAL;
diff --git a/io_uring/timeout.c b/io_uring/timeout.c
index 84dda24f3eb2..cb61d4862fc6 100644
--- a/io_uring/timeout.c
+++ b/io_uring/timeout.c
@@ -462,7 +462,7 @@ int io_timeout_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
tr->ltimeout = true;
if (tr->flags & ~(IORING_TIMEOUT_UPDATE_MASK|IORING_TIMEOUT_ABS))
return -EINVAL;
- if (get_timespec64(&tr->ts, u64_to_user_ptr(sqe->addr2)))
+ if (get_timespec64(&tr->ts, u64_to_user_ptr(READ_ONCE(sqe->addr2))))
return -EFAULT;
if (tr->ts.tv_sec < 0 || tr->ts.tv_nsec < 0)
return -EINVAL;
@@ -557,7 +557,7 @@ static int __io_timeout_prep(struct io_kiocb *req,
data->req = req;
data->flags = flags;
- if (get_timespec64(&data->ts, u64_to_user_ptr(sqe->addr)))
+ if (get_timespec64(&data->ts, u64_to_user_ptr(READ_ONCE(sqe->addr))))
return -EFAULT;
if (data->ts.tv_sec < 0 || data->ts.tv_nsec < 0)
diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c
index 144f30e740e8..f355cf1c1a16 100644
--- a/kernel/bpf/arena.c
+++ b/kernel/bpf/arena.c
@@ -303,7 +303,7 @@ static long arena_map_update_elem(struct bpf_map *map, void *key,
return -EOPNOTSUPP;
}
-static int arena_map_check_btf(const struct bpf_map *map, const struct btf *btf,
+static int arena_map_check_btf(struct bpf_map *map, const struct btf *btf,
const struct btf_type *key_type, const struct btf_type *value_type)
{
return 0;
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 26763df6134a..33de68c95ad8 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -548,7 +548,7 @@ static void percpu_array_map_seq_show_elem(struct bpf_map *map, void *key,
rcu_read_unlock();
}
-static int array_map_check_btf(const struct bpf_map *map,
+static int array_map_check_btf(struct bpf_map *map,
const struct btf *btf,
const struct btf_type *key_type,
const struct btf_type *value_type)
diff --git a/kernel/bpf/bloom_filter.c b/kernel/bpf/bloom_filter.c
index 35e1ddca74d2..b73336c976b7 100644
--- a/kernel/bpf/bloom_filter.c
+++ b/kernel/bpf/bloom_filter.c
@@ -180,7 +180,7 @@ static long bloom_map_update_elem(struct bpf_map *map, void *key,
return -EINVAL;
}
-static int bloom_map_check_btf(const struct bpf_map *map,
+static int bloom_map_check_btf(struct bpf_map *map,
const struct btf *btf,
const struct btf_type *key_type,
const struct btf_type *value_type)
diff --git a/kernel/bpf/bpf_insn_array.c b/kernel/bpf/bpf_insn_array.c
index c0286f25ca3c..a2f84afe6f7c 100644
--- a/kernel/bpf/bpf_insn_array.c
+++ b/kernel/bpf/bpf_insn_array.c
@@ -98,7 +98,7 @@ static long insn_array_delete_elem(struct bpf_map *map, void *key)
return -EINVAL;
}
-static int insn_array_check_btf(const struct bpf_map *map,
+static int insn_array_check_btf(struct bpf_map *map,
const struct btf *btf,
const struct btf_type *key_type,
const struct btf_type *value_type)
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index b28f07d3a0db..9c96a4477f81 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -107,14 +107,12 @@ static void __bpf_local_storage_free_trace_rcu(struct rcu_head *rcu)
{
struct bpf_local_storage *local_storage;
- /* If RCU Tasks Trace grace period implies RCU grace period, do
- * kfree(), else do kfree_rcu().
+ /*
+ * RCU Tasks Trace grace period implies RCU grace period, do
+ * kfree() directly.
*/
local_storage = container_of(rcu, struct bpf_local_storage, rcu);
- if (rcu_trace_implies_rcu_gp())
- kfree(local_storage);
- else
- kfree_rcu(local_storage, rcu);
+ kfree(local_storage);
}
/* Handle use_kmalloc_nolock == false */
@@ -138,10 +136,11 @@ static void bpf_local_storage_free_rcu(struct rcu_head *rcu)
static void bpf_local_storage_free_trace_rcu(struct rcu_head *rcu)
{
- if (rcu_trace_implies_rcu_gp())
- bpf_local_storage_free_rcu(rcu);
- else
- call_rcu(rcu, bpf_local_storage_free_rcu);
+ /*
+ * RCU Tasks Trace grace period implies RCU grace period, do
+ * kfree() directly.
+ */
+ bpf_local_storage_free_rcu(rcu);
}
static void bpf_local_storage_free(struct bpf_local_storage *local_storage,
@@ -164,16 +163,29 @@ static void bpf_local_storage_free(struct bpf_local_storage *local_storage,
bpf_local_storage_free_trace_rcu);
}
-/* rcu tasks trace callback for use_kmalloc_nolock == false */
-static void __bpf_selem_free_trace_rcu(struct rcu_head *rcu)
+/* rcu callback for use_kmalloc_nolock == false */
+static void __bpf_selem_free_rcu(struct rcu_head *rcu)
{
struct bpf_local_storage_elem *selem;
+ struct bpf_local_storage_map *smap;
selem = container_of(rcu, struct bpf_local_storage_elem, rcu);
- if (rcu_trace_implies_rcu_gp())
- kfree(selem);
- else
- kfree_rcu(selem, rcu);
+ /* bpf_selem_unlink_nofail may have already cleared smap and freed fields. */
+ smap = rcu_dereference_check(SDATA(selem)->smap, 1);
+
+ if (smap)
+ bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
+ kfree(selem);
+}
+
+/* rcu tasks trace callback for use_kmalloc_nolock == false */
+static void __bpf_selem_free_trace_rcu(struct rcu_head *rcu)
+{
+ /*
+ * RCU Tasks Trace grace period implies RCU grace period, do
+ * kfree() directly.
+ */
+ __bpf_selem_free_rcu(rcu);
}
/* Handle use_kmalloc_nolock == false */
@@ -181,7 +193,7 @@ static void __bpf_selem_free(struct bpf_local_storage_elem *selem,
bool vanilla_rcu)
{
if (vanilla_rcu)
- kfree_rcu(selem, rcu);
+ call_rcu(&selem->rcu, __bpf_selem_free_rcu);
else
call_rcu_tasks_trace(&selem->rcu, __bpf_selem_free_trace_rcu);
}
@@ -195,37 +207,29 @@ static void bpf_selem_free_rcu(struct rcu_head *rcu)
/* The bpf_local_storage_map_free will wait for rcu_barrier */
smap = rcu_dereference_check(SDATA(selem)->smap, 1);
- if (smap) {
- migrate_disable();
+ if (smap)
bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
- migrate_enable();
- }
kfree_nolock(selem);
}
static void bpf_selem_free_trace_rcu(struct rcu_head *rcu)
{
- if (rcu_trace_implies_rcu_gp())
- bpf_selem_free_rcu(rcu);
- else
- call_rcu(rcu, bpf_selem_free_rcu);
+ /*
+ * RCU Tasks Trace grace period implies RCU grace period, do
+ * kfree() directly.
+ */
+ bpf_selem_free_rcu(rcu);
}
void bpf_selem_free(struct bpf_local_storage_elem *selem,
bool reuse_now)
{
- struct bpf_local_storage_map *smap;
-
- smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
-
if (!selem->use_kmalloc_nolock) {
/*
* No uptr will be unpin even when reuse_now == false since uptr
* is only supported in task local storage, where
* smap->use_kmalloc_nolock == true.
*/
- if (smap)
- bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
__bpf_selem_free(selem, reuse_now);
return;
}
@@ -797,7 +801,7 @@ int bpf_local_storage_map_alloc_check(union bpf_attr *attr)
return 0;
}
-int bpf_local_storage_map_check_btf(const struct bpf_map *map,
+int bpf_local_storage_map_check_btf(struct bpf_map *map,
const struct btf *btf,
const struct btf_type *key_type,
const struct btf_type *value_type)
@@ -958,10 +962,9 @@ restart:
*/
synchronize_rcu();
- if (smap->use_kmalloc_nolock) {
- rcu_barrier_tasks_trace();
- rcu_barrier();
- }
+ /* smap remains in use regardless of kmalloc_nolock, so wait unconditionally. */
+ rcu_barrier_tasks_trace();
+ rcu_barrier();
kvfree(smap->buckets);
bpf_map_area_free(smap);
}
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 04171fbc39cb..32b43cb9061b 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -29,6 +29,7 @@
#include <linux/sched.h>
#include <linux/workqueue.h>
#include <linux/kthread.h>
+#include <linux/local_lock.h>
#include <linux/completion.h>
#include <trace/events/xdp.h>
#include <linux/btf_ids.h>
@@ -52,6 +53,7 @@ struct xdp_bulk_queue {
struct list_head flush_node;
struct bpf_cpu_map_entry *obj;
unsigned int count;
+ local_lock_t bq_lock;
};
/* Struct for every remote "destination" CPU in map */
@@ -451,6 +453,7 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value,
for_each_possible_cpu(i) {
bq = per_cpu_ptr(rcpu->bulkq, i);
bq->obj = rcpu;
+ local_lock_init(&bq->bq_lock);
}
/* Alloc queue */
@@ -722,6 +725,8 @@ static void bq_flush_to_queue(struct xdp_bulk_queue *bq)
struct ptr_ring *q;
int i;
+ lockdep_assert_held(&bq->bq_lock);
+
if (unlikely(!bq->count))
return;
@@ -749,11 +754,15 @@ static void bq_flush_to_queue(struct xdp_bulk_queue *bq)
}
/* Runs under RCU-read-side, plus in softirq under NAPI protection.
- * Thus, safe percpu variable access.
+ * Thus, safe percpu variable access. PREEMPT_RT relies on
+ * local_lock_nested_bh() to serialise access to the per-CPU bq.
*/
static void bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf)
{
- struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq);
+ struct xdp_bulk_queue *bq;
+
+ local_lock_nested_bh(&rcpu->bulkq->bq_lock);
+ bq = this_cpu_ptr(rcpu->bulkq);
if (unlikely(bq->count == CPU_MAP_BULK_SIZE))
bq_flush_to_queue(bq);
@@ -774,6 +783,8 @@ static void bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf)
list_add(&bq->flush_node, flush_list);
}
+
+ local_unlock_nested_bh(&rcpu->bulkq->bq_lock);
}
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf,
@@ -810,7 +821,9 @@ void __cpu_map_flush(struct list_head *flush_list)
struct xdp_bulk_queue *bq, *tmp;
list_for_each_entry_safe(bq, tmp, flush_list, flush_node) {
+ local_lock_nested_bh(&bq->obj->bulkq->bq_lock);
bq_flush_to_queue(bq);
+ local_unlock_nested_bh(&bq->obj->bulkq->bq_lock);
/* If already running, costs spin_lock_irqsave + smb_mb */
wake_up_process(bq->obj->kthread);
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 2625601de76e..3d619d01088e 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -45,6 +45,7 @@
* types of devmap; only the lookup and insertion is different.
*/
#include <linux/bpf.h>
+#include <linux/local_lock.h>
#include <net/xdp.h>
#include <linux/filter.h>
#include <trace/events/xdp.h>
@@ -60,6 +61,7 @@ struct xdp_dev_bulk_queue {
struct net_device *dev_rx;
struct bpf_prog *xdp_prog;
unsigned int count;
+ local_lock_t bq_lock;
};
struct bpf_dtab_netdev {
@@ -381,6 +383,8 @@ static void bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
int to_send = cnt;
int i;
+ lockdep_assert_held(&bq->bq_lock);
+
if (unlikely(!cnt))
return;
@@ -425,10 +429,12 @@ void __dev_flush(struct list_head *flush_list)
struct xdp_dev_bulk_queue *bq, *tmp;
list_for_each_entry_safe(bq, tmp, flush_list, flush_node) {
+ local_lock_nested_bh(&bq->dev->xdp_bulkq->bq_lock);
bq_xmit_all(bq, XDP_XMIT_FLUSH);
bq->dev_rx = NULL;
bq->xdp_prog = NULL;
__list_del_clearprev(&bq->flush_node);
+ local_unlock_nested_bh(&bq->dev->xdp_bulkq->bq_lock);
}
}
@@ -451,12 +457,16 @@ static void *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
/* Runs in NAPI, i.e., softirq under local_bh_disable(). Thus, safe percpu
* variable access, and map elements stick around. See comment above
- * xdp_do_flush() in filter.c.
+ * xdp_do_flush() in filter.c. PREEMPT_RT relies on local_lock_nested_bh()
+ * to serialise access to the per-CPU bq.
*/
static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
struct net_device *dev_rx, struct bpf_prog *xdp_prog)
{
- struct xdp_dev_bulk_queue *bq = this_cpu_ptr(dev->xdp_bulkq);
+ struct xdp_dev_bulk_queue *bq;
+
+ local_lock_nested_bh(&dev->xdp_bulkq->bq_lock);
+ bq = this_cpu_ptr(dev->xdp_bulkq);
if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
bq_xmit_all(bq, 0);
@@ -477,6 +487,8 @@ static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
}
bq->q[bq->count++] = xdpf;
+
+ local_unlock_nested_bh(&dev->xdp_bulkq->bq_lock);
}
static inline int __xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
@@ -588,18 +600,22 @@ static inline bool is_ifindex_excluded(int *excluded, int num_excluded, int ifin
}
/* Get ifindex of each upper device. 'indexes' must be able to hold at
- * least MAX_NEST_DEV elements.
- * Returns the number of ifindexes added.
+ * least 'max' elements.
+ * Returns the number of ifindexes added, or -EOVERFLOW if there are too
+ * many upper devices.
*/
-static int get_upper_ifindexes(struct net_device *dev, int *indexes)
+static int get_upper_ifindexes(struct net_device *dev, int *indexes, int max)
{
struct net_device *upper;
struct list_head *iter;
int n = 0;
netdev_for_each_upper_dev_rcu(dev, upper, iter) {
+ if (n >= max)
+ return -EOVERFLOW;
indexes[n++] = upper->ifindex;
}
+
return n;
}
@@ -615,7 +631,11 @@ int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx,
int err;
if (exclude_ingress) {
- num_excluded = get_upper_ifindexes(dev_rx, excluded_devices);
+ num_excluded = get_upper_ifindexes(dev_rx, excluded_devices,
+ ARRAY_SIZE(excluded_devices) - 1);
+ if (num_excluded < 0)
+ return num_excluded;
+
excluded_devices[num_excluded++] = dev_rx->ifindex;
}
@@ -733,7 +753,11 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
int err;
if (exclude_ingress) {
- num_excluded = get_upper_ifindexes(dev, excluded_devices);
+ num_excluded = get_upper_ifindexes(dev, excluded_devices,
+ ARRAY_SIZE(excluded_devices) - 1);
+ if (num_excluded < 0)
+ return num_excluded;
+
excluded_devices[num_excluded++] = dev->ifindex;
}
@@ -1115,8 +1139,13 @@ static int dev_map_notification(struct notifier_block *notifier,
if (!netdev->xdp_bulkq)
return NOTIFY_BAD;
- for_each_possible_cpu(cpu)
- per_cpu_ptr(netdev->xdp_bulkq, cpu)->dev = netdev;
+ for_each_possible_cpu(cpu) {
+ struct xdp_dev_bulk_queue *bq;
+
+ bq = per_cpu_ptr(netdev->xdp_bulkq, cpu);
+ bq->dev = netdev;
+ local_lock_init(&bq->bq_lock);
+ }
break;
case NETDEV_UNREGISTER:
/* This rcu_read_lock/unlock pair is needed because
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 3b9d297a53be..bc6bc8bb871d 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -125,6 +125,11 @@ struct htab_elem {
char key[] __aligned(8);
};
+struct htab_btf_record {
+ struct btf_record *record;
+ u32 key_size;
+};
+
static inline bool htab_is_prealloc(const struct bpf_htab *htab)
{
return !(htab->map.map_flags & BPF_F_NO_PREALLOC);
@@ -457,6 +462,83 @@ static int htab_map_alloc_check(union bpf_attr *attr)
return 0;
}
+static void htab_mem_dtor(void *obj, void *ctx)
+{
+ struct htab_btf_record *hrec = ctx;
+ struct htab_elem *elem = obj;
+ void *map_value;
+
+ if (IS_ERR_OR_NULL(hrec->record))
+ return;
+
+ map_value = htab_elem_value(elem, hrec->key_size);
+ bpf_obj_free_fields(hrec->record, map_value);
+}
+
+static void htab_pcpu_mem_dtor(void *obj, void *ctx)
+{
+ void __percpu *pptr = *(void __percpu **)obj;
+ struct htab_btf_record *hrec = ctx;
+ int cpu;
+
+ if (IS_ERR_OR_NULL(hrec->record))
+ return;
+
+ for_each_possible_cpu(cpu)
+ bpf_obj_free_fields(hrec->record, per_cpu_ptr(pptr, cpu));
+}
+
+static void htab_dtor_ctx_free(void *ctx)
+{
+ struct htab_btf_record *hrec = ctx;
+
+ btf_record_free(hrec->record);
+ kfree(ctx);
+}
+
+static int htab_set_dtor(struct bpf_htab *htab, void (*dtor)(void *, void *))
+{
+ u32 key_size = htab->map.key_size;
+ struct bpf_mem_alloc *ma;
+ struct htab_btf_record *hrec;
+ int err;
+
+ /* No need for dtors. */
+ if (IS_ERR_OR_NULL(htab->map.record))
+ return 0;
+
+ hrec = kzalloc(sizeof(*hrec), GFP_KERNEL);
+ if (!hrec)
+ return -ENOMEM;
+ hrec->key_size = key_size;
+ hrec->record = btf_record_dup(htab->map.record);
+ if (IS_ERR(hrec->record)) {
+ err = PTR_ERR(hrec->record);
+ kfree(hrec);
+ return err;
+ }
+ ma = htab_is_percpu(htab) ? &htab->pcpu_ma : &htab->ma;
+ bpf_mem_alloc_set_dtor(ma, dtor, htab_dtor_ctx_free, hrec);
+ return 0;
+}
+
+static int htab_map_check_btf(struct bpf_map *map, const struct btf *btf,
+ const struct btf_type *key_type, const struct btf_type *value_type)
+{
+ struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+
+ if (htab_is_prealloc(htab))
+ return 0;
+ /*
+ * We must set the dtor using this callback, as map's BTF record is not
+ * populated in htab_map_alloc(), so it will always appear as NULL.
+ */
+ if (htab_is_percpu(htab))
+ return htab_set_dtor(htab, htab_pcpu_mem_dtor);
+ else
+ return htab_set_dtor(htab, htab_mem_dtor);
+}
+
static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
{
bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
@@ -2281,6 +2363,7 @@ const struct bpf_map_ops htab_map_ops = {
.map_seq_show_elem = htab_map_seq_show_elem,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_hash_elem,
+ .map_check_btf = htab_map_check_btf,
.map_mem_usage = htab_map_mem_usage,
BATCH_OPS(htab),
.map_btf_id = &htab_map_btf_ids[0],
@@ -2303,6 +2386,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
.map_seq_show_elem = htab_map_seq_show_elem,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_hash_elem,
+ .map_check_btf = htab_map_check_btf,
.map_mem_usage = htab_map_mem_usage,
BATCH_OPS(htab_lru),
.map_btf_id = &htab_map_btf_ids[0],
@@ -2482,6 +2566,7 @@ const struct bpf_map_ops htab_percpu_map_ops = {
.map_seq_show_elem = htab_percpu_map_seq_show_elem,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_hash_elem,
+ .map_check_btf = htab_map_check_btf,
.map_mem_usage = htab_map_mem_usage,
BATCH_OPS(htab_percpu),
.map_btf_id = &htab_map_btf_ids[0],
@@ -2502,6 +2587,7 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = {
.map_seq_show_elem = htab_percpu_map_seq_show_elem,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_hash_elem,
+ .map_check_btf = htab_map_check_btf,
.map_mem_usage = htab_map_mem_usage,
BATCH_OPS(htab_lru_percpu),
.map_btf_id = &htab_map_btf_ids[0],
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index 1ccbf28b2ad9..8fca0c64f7b1 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -364,7 +364,7 @@ static long cgroup_storage_delete_elem(struct bpf_map *map, void *key)
return -EINVAL;
}
-static int cgroup_storage_check_btf(const struct bpf_map *map,
+static int cgroup_storage_check_btf(struct bpf_map *map,
const struct btf *btf,
const struct btf_type *key_type,
const struct btf_type *value_type)
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 1adeb4d3b8cf..0f57608b385d 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -751,7 +751,7 @@ free_stack:
return err;
}
-static int trie_check_btf(const struct bpf_map *map,
+static int trie_check_btf(struct bpf_map *map,
const struct btf *btf,
const struct btf_type *key_type,
const struct btf_type *value_type)
diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index bd45dda9dc35..682a9f34214b 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -102,6 +102,8 @@ struct bpf_mem_cache {
int percpu_size;
bool draining;
struct bpf_mem_cache *tgt;
+ void (*dtor)(void *obj, void *ctx);
+ void *dtor_ctx;
/* list of objects to be freed after RCU GP */
struct llist_head free_by_rcu;
@@ -260,12 +262,14 @@ static void free_one(void *obj, bool percpu)
kfree(obj);
}
-static int free_all(struct llist_node *llnode, bool percpu)
+static int free_all(struct bpf_mem_cache *c, struct llist_node *llnode, bool percpu)
{
struct llist_node *pos, *t;
int cnt = 0;
llist_for_each_safe(pos, t, llnode) {
+ if (c->dtor)
+ c->dtor((void *)pos + LLIST_NODE_SZ, c->dtor_ctx);
free_one(pos, percpu);
cnt++;
}
@@ -276,7 +280,7 @@ static void __free_rcu(struct rcu_head *head)
{
struct bpf_mem_cache *c = container_of(head, struct bpf_mem_cache, rcu_ttrace);
- free_all(llist_del_all(&c->waiting_for_gp_ttrace), !!c->percpu_size);
+ free_all(c, llist_del_all(&c->waiting_for_gp_ttrace), !!c->percpu_size);
atomic_set(&c->call_rcu_ttrace_in_progress, 0);
}
@@ -308,7 +312,7 @@ static void do_call_rcu_ttrace(struct bpf_mem_cache *c)
if (atomic_xchg(&c->call_rcu_ttrace_in_progress, 1)) {
if (unlikely(READ_ONCE(c->draining))) {
llnode = llist_del_all(&c->free_by_rcu_ttrace);
- free_all(llnode, !!c->percpu_size);
+ free_all(c, llnode, !!c->percpu_size);
}
return;
}
@@ -417,7 +421,7 @@ static void check_free_by_rcu(struct bpf_mem_cache *c)
dec_active(c, &flags);
if (unlikely(READ_ONCE(c->draining))) {
- free_all(llist_del_all(&c->waiting_for_gp), !!c->percpu_size);
+ free_all(c, llist_del_all(&c->waiting_for_gp), !!c->percpu_size);
atomic_set(&c->call_rcu_in_progress, 0);
} else {
call_rcu_hurry(&c->rcu, __free_by_rcu);
@@ -635,13 +639,13 @@ static void drain_mem_cache(struct bpf_mem_cache *c)
* Except for waiting_for_gp_ttrace list, there are no concurrent operations
* on these lists, so it is safe to use __llist_del_all().
*/
- free_all(llist_del_all(&c->free_by_rcu_ttrace), percpu);
- free_all(llist_del_all(&c->waiting_for_gp_ttrace), percpu);
- free_all(__llist_del_all(&c->free_llist), percpu);
- free_all(__llist_del_all(&c->free_llist_extra), percpu);
- free_all(__llist_del_all(&c->free_by_rcu), percpu);
- free_all(__llist_del_all(&c->free_llist_extra_rcu), percpu);
- free_all(llist_del_all(&c->waiting_for_gp), percpu);
+ free_all(c, llist_del_all(&c->free_by_rcu_ttrace), percpu);
+ free_all(c, llist_del_all(&c->waiting_for_gp_ttrace), percpu);
+ free_all(c, __llist_del_all(&c->free_llist), percpu);
+ free_all(c, __llist_del_all(&c->free_llist_extra), percpu);
+ free_all(c, __llist_del_all(&c->free_by_rcu), percpu);
+ free_all(c, __llist_del_all(&c->free_llist_extra_rcu), percpu);
+ free_all(c, llist_del_all(&c->waiting_for_gp), percpu);
}
static void check_mem_cache(struct bpf_mem_cache *c)
@@ -680,6 +684,9 @@ static void check_leaked_objs(struct bpf_mem_alloc *ma)
static void free_mem_alloc_no_barrier(struct bpf_mem_alloc *ma)
{
+ /* We can free dtor ctx only once all callbacks are done using it. */
+ if (ma->dtor_ctx_free)
+ ma->dtor_ctx_free(ma->dtor_ctx);
check_leaked_objs(ma);
free_percpu(ma->cache);
free_percpu(ma->caches);
@@ -1014,3 +1021,32 @@ int bpf_mem_alloc_check_size(bool percpu, size_t size)
return 0;
}
+
+void bpf_mem_alloc_set_dtor(struct bpf_mem_alloc *ma, void (*dtor)(void *obj, void *ctx),
+ void (*dtor_ctx_free)(void *ctx), void *ctx)
+{
+ struct bpf_mem_caches *cc;
+ struct bpf_mem_cache *c;
+ int cpu, i;
+
+ ma->dtor_ctx_free = dtor_ctx_free;
+ ma->dtor_ctx = ctx;
+
+ if (ma->cache) {
+ for_each_possible_cpu(cpu) {
+ c = per_cpu_ptr(ma->cache, cpu);
+ c->dtor = dtor;
+ c->dtor_ctx = ctx;
+ }
+ }
+ if (ma->caches) {
+ for_each_possible_cpu(cpu) {
+ cc = per_cpu_ptr(ma->caches, cpu);
+ for (i = 0; i < NUM_CACHES; i++) {
+ c = &cc->cache[i];
+ c->dtor = dtor;
+ c->dtor_ctx = ctx;
+ }
+ }
+ }
+}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 0378e83b4099..274039e36465 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1234,7 +1234,7 @@ int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size)
}
EXPORT_SYMBOL_GPL(bpf_obj_name_cpy);
-int map_check_no_btf(const struct bpf_map *map,
+int map_check_no_btf(struct bpf_map *map,
const struct btf *btf,
const struct btf_type *key_type,
const struct btf_type *value_type)
diff --git a/kernel/bpf/tnum.c b/kernel/bpf/tnum.c
index 26fbfbb01700..4abc359b3db0 100644
--- a/kernel/bpf/tnum.c
+++ b/kernel/bpf/tnum.c
@@ -269,3 +269,59 @@ struct tnum tnum_bswap64(struct tnum a)
{
return TNUM(swab64(a.value), swab64(a.mask));
}
+
+/* Given tnum t, and a number z such that tmin <= z < tmax, where tmin
+ * is the smallest member of the t (= t.value) and tmax is the largest
+ * member of t (= t.value | t.mask), returns the smallest member of t
+ * larger than z.
+ *
+ * For example,
+ * t = x11100x0
+ * z = 11110001 (241)
+ * result = 11110010 (242)
+ *
+ * Note: if this function is called with z >= tmax, it just returns
+ * early with tmax; if this function is called with z < tmin, the
+ * algorithm already returns tmin.
+ */
+u64 tnum_step(struct tnum t, u64 z)
+{
+ u64 tmax, j, p, q, r, s, v, u, w, res;
+ u8 k;
+
+ tmax = t.value | t.mask;
+
+ /* if z >= largest member of t, return largest member of t */
+ if (z >= tmax)
+ return tmax;
+
+ /* if z < smallest member of t, return smallest member of t */
+ if (z < t.value)
+ return t.value;
+
+ /* keep t's known bits, and match all unknown bits to z */
+ j = t.value | (z & t.mask);
+
+ if (j > z) {
+ p = ~z & t.value & ~t.mask;
+ k = fls64(p); /* k is the most-significant 0-to-1 flip */
+ q = U64_MAX << k;
+ r = q & z; /* positions > k matched to z */
+ s = ~q & t.value; /* positions <= k matched to t.value */
+ v = r | s;
+ res = v;
+ } else {
+ p = z & ~t.value & ~t.mask;
+ k = fls64(p); /* k is the most-significant 1-to-0 flip */
+ q = U64_MAX << k;
+ r = q & t.mask & z; /* unknown positions > k, matched to z */
+ s = q & ~t.mask; /* known positions > k, set to 1 */
+ v = r | s;
+ /* add 1 to unknown positions > k to make value greater than z */
+ u = v + (1ULL << k);
+ /* extract bits in unknown positions > k from u, rest from t.value */
+ w = (u & t.mask) | t.value;
+ res = w;
+ }
+ return res;
+}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index bb12ba020649..401d6c4960ec 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2379,6 +2379,9 @@ static void __update_reg32_bounds(struct bpf_reg_state *reg)
static void __update_reg64_bounds(struct bpf_reg_state *reg)
{
+ u64 tnum_next, tmax;
+ bool umin_in_tnum;
+
/* min signed is max(sign bit) | min(other bits) */
reg->smin_value = max_t(s64, reg->smin_value,
reg->var_off.value | (reg->var_off.mask & S64_MIN));
@@ -2388,6 +2391,33 @@ static void __update_reg64_bounds(struct bpf_reg_state *reg)
reg->umin_value = max(reg->umin_value, reg->var_off.value);
reg->umax_value = min(reg->umax_value,
reg->var_off.value | reg->var_off.mask);
+
+ /* Check if u64 and tnum overlap in a single value */
+ tnum_next = tnum_step(reg->var_off, reg->umin_value);
+ umin_in_tnum = (reg->umin_value & ~reg->var_off.mask) == reg->var_off.value;
+ tmax = reg->var_off.value | reg->var_off.mask;
+ if (umin_in_tnum && tnum_next > reg->umax_value) {
+ /* The u64 range and the tnum only overlap in umin.
+ * u64: ---[xxxxxx]-----
+ * tnum: --xx----------x-
+ */
+ ___mark_reg_known(reg, reg->umin_value);
+ } else if (!umin_in_tnum && tnum_next == tmax) {
+ /* The u64 range and the tnum only overlap in the maximum value
+ * represented by the tnum, called tmax.
+ * u64: ---[xxxxxx]-----
+ * tnum: xx-----x--------
+ */
+ ___mark_reg_known(reg, tmax);
+ } else if (!umin_in_tnum && tnum_next <= reg->umax_value &&
+ tnum_step(reg->var_off, tnum_next) > reg->umax_value) {
+ /* The u64 range and the tnum only overlap in between umin
+ * (excluded) and umax.
+ * u64: ---[xxxxxx]-----
+ * tnum: xx----x-------x-
+ */
+ ___mark_reg_known(reg, tnum_next);
+ }
}
static void __update_reg_bounds(struct bpf_reg_state *reg)
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index c22cda7766d8..be1d71dda317 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -2608,6 +2608,7 @@ static void cgroup_migrate_add_task(struct task_struct *task,
mgctx->tset.nr_tasks++;
+ css_set_skip_task_iters(cset, task);
list_move_tail(&task->cg_list, &cset->mg_tasks);
if (list_empty(&cset->mg_node))
list_add_tail(&cset->mg_node,
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 9faf34377a88..e200de7c60b6 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -62,6 +62,75 @@ static const char * const perr_strings[] = {
};
/*
+ * CPUSET Locking Convention
+ * -------------------------
+ *
+ * Below are the four global/local locks guarding cpuset structures in lock
+ * acquisition order:
+ * - cpuset_top_mutex
+ * - cpu_hotplug_lock (cpus_read_lock/cpus_write_lock)
+ * - cpuset_mutex
+ * - callback_lock (raw spinlock)
+ *
+ * As cpuset will now indirectly flush a number of different workqueues in
+ * housekeeping_update() to update housekeeping cpumasks when the set of
+ * isolated CPUs is going to be changed, it may be vulnerable to deadlock
+ * if we hold cpus_read_lock while calling into housekeeping_update().
+ *
+ * The first cpuset_top_mutex will be held except when calling into
+ * cpuset_handle_hotplug() from the CPU hotplug code where cpus_write_lock
+ * and cpuset_mutex will be held instead. The main purpose of this mutex
+ * is to prevent regular cpuset control file write actions from interfering
+ * with the call to housekeeping_update(), though CPU hotplug operation can
+ * still happen in parallel. This mutex also provides protection for some
+ * internal variables.
+ *
+ * A task must hold all the remaining three locks to modify externally visible
+ * or used fields of cpusets, though some of the internally used cpuset fields
+ * and internal variables can be modified without holding callback_lock. If only
+ * reliable read access of the externally used fields are needed, a task can
+ * hold either cpuset_mutex or callback_lock which are exposed to other
+ * external subsystems.
+ *
+ * If a task holds cpu_hotplug_lock and cpuset_mutex, it blocks others,
+ * ensuring that it is the only task able to also acquire callback_lock and
+ * be able to modify cpusets. It can perform various checks on the cpuset
+ * structure first, knowing nothing will change. It can also allocate memory
+ * without holding callback_lock. While it is performing these checks, various
+ * callback routines can briefly acquire callback_lock to query cpusets. Once
+ * it is ready to make the changes, it takes callback_lock, blocking everyone
+ * else.
+ *
+ * Calls to the kernel memory allocator cannot be made while holding
+ * callback_lock which is a spinlock, as the memory allocator may sleep or
+ * call back into cpuset code and acquire callback_lock.
+ *
+ * Now, the task_struct fields mems_allowed and mempolicy may be changed
+ * by other task, we use alloc_lock in the task_struct fields to protect
+ * them.
+ *
+ * The cpuset_common_seq_show() handlers only hold callback_lock across
+ * small pieces of code, such as when reading out possibly multi-word
+ * cpumasks and nodemasks.
+ */
+
+static DEFINE_MUTEX(cpuset_top_mutex);
+static DEFINE_MUTEX(cpuset_mutex);
+
+/*
+ * File level internal variables below follow one of the following exclusion
+ * rules.
+ *
+ * RWCS: Read/write-able by holding either cpus_write_lock (and optionally
+ * cpuset_mutex) or both cpus_read_lock and cpuset_mutex.
+ *
+ * CSCB: Readable by holding either cpuset_mutex or callback_lock. Writable
+ * by holding both cpuset_mutex and callback_lock.
+ *
+ * T: Read/write-able by holding the cpuset_top_mutex.
+ */
+
+/*
* For local partitions, update to subpartitions_cpus & isolated_cpus is done
* in update_parent_effective_cpumask(). For remote partitions, it is done in
* the remote_partition_*() and remote_cpus_update() helpers.
@@ -70,19 +139,22 @@ static const char * const perr_strings[] = {
* Exclusive CPUs distributed out to local or remote sub-partitions of
* top_cpuset
*/
-static cpumask_var_t subpartitions_cpus;
+static cpumask_var_t subpartitions_cpus; /* RWCS */
+
+/*
+ * Exclusive CPUs in isolated partitions (shown in cpuset.cpus.isolated)
+ */
+static cpumask_var_t isolated_cpus; /* CSCB */
/*
- * Exclusive CPUs in isolated partitions
+ * Set if housekeeping cpumasks are to be updated.
*/
-static cpumask_var_t isolated_cpus;
+static bool update_housekeeping; /* RWCS */
/*
- * isolated_cpus updating flag (protected by cpuset_mutex)
- * Set if isolated_cpus is going to be updated in the current
- * cpuset_mutex crtical section.
+ * Copy of isolated_cpus to be passed to housekeeping_update()
*/
-static bool isolated_cpus_updating;
+static cpumask_var_t isolated_hk_cpus; /* T */
/*
* A flag to force sched domain rebuild at the end of an operation.
@@ -98,7 +170,7 @@ static bool isolated_cpus_updating;
* Note that update_relax_domain_level() in cpuset-v1.c can still call
* rebuild_sched_domains_locked() directly without using this flag.
*/
-static bool force_sd_rebuild;
+static bool force_sd_rebuild; /* RWCS */
/*
* Partition root states:
@@ -218,42 +290,6 @@ struct cpuset top_cpuset = {
.partition_root_state = PRS_ROOT,
};
-/*
- * There are two global locks guarding cpuset structures - cpuset_mutex and
- * callback_lock. The cpuset code uses only cpuset_mutex. Other kernel
- * subsystems can use cpuset_lock()/cpuset_unlock() to prevent change to cpuset
- * structures. Note that cpuset_mutex needs to be a mutex as it is used in
- * paths that rely on priority inheritance (e.g. scheduler - on RT) for
- * correctness.
- *
- * A task must hold both locks to modify cpusets. If a task holds
- * cpuset_mutex, it blocks others, ensuring that it is the only task able to
- * also acquire callback_lock and be able to modify cpusets. It can perform
- * various checks on the cpuset structure first, knowing nothing will change.
- * It can also allocate memory while just holding cpuset_mutex. While it is
- * performing these checks, various callback routines can briefly acquire
- * callback_lock to query cpusets. Once it is ready to make the changes, it
- * takes callback_lock, blocking everyone else.
- *
- * Calls to the kernel memory allocator can not be made while holding
- * callback_lock, as that would risk double tripping on callback_lock
- * from one of the callbacks into the cpuset code from within
- * __alloc_pages().
- *
- * If a task is only holding callback_lock, then it has read-only
- * access to cpusets.
- *
- * Now, the task_struct fields mems_allowed and mempolicy may be changed
- * by other task, we use alloc_lock in the task_struct fields to protect
- * them.
- *
- * The cpuset_common_seq_show() handlers only hold callback_lock across
- * small pieces of code, such as when reading out possibly multi-word
- * cpumasks and nodemasks.
- */
-
-static DEFINE_MUTEX(cpuset_mutex);
-
/**
* cpuset_lock - Acquire the global cpuset mutex
*
@@ -283,6 +319,7 @@ void lockdep_assert_cpuset_lock_held(void)
*/
void cpuset_full_lock(void)
{
+ mutex_lock(&cpuset_top_mutex);
cpus_read_lock();
mutex_lock(&cpuset_mutex);
}
@@ -291,12 +328,14 @@ void cpuset_full_unlock(void)
{
mutex_unlock(&cpuset_mutex);
cpus_read_unlock();
+ mutex_unlock(&cpuset_top_mutex);
}
#ifdef CONFIG_LOCKDEP
bool lockdep_is_cpuset_held(void)
{
- return lockdep_is_held(&cpuset_mutex);
+ return lockdep_is_held(&cpuset_mutex) ||
+ lockdep_is_held(&cpuset_top_mutex);
}
#endif
@@ -961,7 +1000,7 @@ void rebuild_sched_domains_locked(void)
* offline CPUs, a warning is emitted and we return directly to
* prevent the panic.
*/
- for (i = 0; i < ndoms; ++i) {
+ for (i = 0; doms && i < ndoms; i++) {
if (WARN_ON_ONCE(!cpumask_subset(doms[i], cpu_active_mask)))
return;
}
@@ -1161,12 +1200,18 @@ static void reset_partition_data(struct cpuset *cs)
static void isolated_cpus_update(int old_prs, int new_prs, struct cpumask *xcpus)
{
WARN_ON_ONCE(old_prs == new_prs);
- if (new_prs == PRS_ISOLATED)
+ lockdep_assert_held(&callback_lock);
+ lockdep_assert_held(&cpuset_mutex);
+ if (new_prs == PRS_ISOLATED) {
+ if (cpumask_subset(xcpus, isolated_cpus))
+ return;
cpumask_or(isolated_cpus, isolated_cpus, xcpus);
- else
+ } else {
+ if (!cpumask_intersects(xcpus, isolated_cpus))
+ return;
cpumask_andnot(isolated_cpus, isolated_cpus, xcpus);
-
- isolated_cpus_updating = true;
+ }
+ update_housekeeping = true;
}
/*
@@ -1219,8 +1264,8 @@ static void partition_xcpus_del(int old_prs, struct cpuset *parent,
isolated_cpus_update(old_prs, parent->partition_root_state,
xcpus);
- cpumask_and(xcpus, xcpus, cpu_active_mask);
cpumask_or(parent->effective_cpus, parent->effective_cpus, xcpus);
+ cpumask_and(parent->effective_cpus, parent->effective_cpus, cpu_active_mask);
}
/*
@@ -1284,22 +1329,43 @@ static bool prstate_housekeeping_conflict(int prstate, struct cpumask *new_cpus)
}
/*
- * update_isolation_cpumasks - Update external isolation related CPU masks
+ * update_hk_sched_domains - Update HK cpumasks & rebuild sched domains
*
- * The following external CPU masks will be updated if necessary:
- * - workqueue unbound cpumask
+ * Update housekeeping cpumasks and rebuild sched domains if necessary.
+ * This should be called at the end of cpuset or hotplug actions.
*/
-static void update_isolation_cpumasks(void)
+static void update_hk_sched_domains(void)
{
- int ret;
-
- if (!isolated_cpus_updating)
- return;
+ if (update_housekeeping) {
+ /* Updating HK cpumasks implies rebuild sched domains */
+ update_housekeeping = false;
+ force_sd_rebuild = true;
+ cpumask_copy(isolated_hk_cpus, isolated_cpus);
- ret = housekeeping_update(isolated_cpus);
- WARN_ON_ONCE(ret < 0);
+ /*
+ * housekeeping_update() is now called without holding
+ * cpus_read_lock and cpuset_mutex. Only cpuset_top_mutex
+ * is still being held for mutual exclusion.
+ */
+ mutex_unlock(&cpuset_mutex);
+ cpus_read_unlock();
+ WARN_ON_ONCE(housekeeping_update(isolated_hk_cpus));
+ cpus_read_lock();
+ mutex_lock(&cpuset_mutex);
+ }
+ /* force_sd_rebuild will be cleared in rebuild_sched_domains_locked() */
+ if (force_sd_rebuild)
+ rebuild_sched_domains_locked();
+}
- isolated_cpus_updating = false;
+/*
+ * Work function to invoke update_hk_sched_domains()
+ */
+static void hk_sd_workfn(struct work_struct *work)
+{
+ cpuset_full_lock();
+ update_hk_sched_domains();
+ cpuset_full_unlock();
}
/**
@@ -1450,7 +1516,6 @@ static int remote_partition_enable(struct cpuset *cs, int new_prs,
cs->remote_partition = true;
cpumask_copy(cs->effective_xcpus, tmp->new_cpus);
spin_unlock_irq(&callback_lock);
- update_isolation_cpumasks();
cpuset_force_rebuild();
cs->prs_err = 0;
@@ -1495,7 +1560,6 @@ static void remote_partition_disable(struct cpuset *cs, struct tmpmasks *tmp)
compute_excpus(cs, cs->effective_xcpus);
reset_partition_data(cs);
spin_unlock_irq(&callback_lock);
- update_isolation_cpumasks();
cpuset_force_rebuild();
/*
@@ -1566,7 +1630,6 @@ static void remote_cpus_update(struct cpuset *cs, struct cpumask *xcpus,
if (xcpus)
cpumask_copy(cs->exclusive_cpus, xcpus);
spin_unlock_irq(&callback_lock);
- update_isolation_cpumasks();
if (adding || deleting)
cpuset_force_rebuild();
@@ -1910,7 +1973,6 @@ write_error:
partition_xcpus_add(new_prs, parent, tmp->delmask);
spin_unlock_irq(&callback_lock);
- update_isolation_cpumasks();
if ((old_prs != new_prs) && (cmd == partcmd_update))
update_partition_exclusive_flag(cs, new_prs);
@@ -2155,7 +2217,7 @@ get_css:
WARN_ON(!is_in_v2_mode() &&
!cpumask_equal(cp->cpus_allowed, cp->effective_cpus));
- cpuset_update_tasks_cpumask(cp, cp->effective_cpus);
+ cpuset_update_tasks_cpumask(cp, tmp->new_cpus);
/*
* On default hierarchy, inherit the CS_SCHED_LOAD_BALANCE
@@ -2878,7 +2940,6 @@ out:
else if (isolcpus_updated)
isolated_cpus_update(old_prs, new_prs, cs->effective_xcpus);
spin_unlock_irq(&callback_lock);
- update_isolation_cpumasks();
/* Force update if switching back to member & update effective_xcpus */
update_cpumasks_hier(cs, &tmpmask, !new_prs);
@@ -3168,9 +3229,8 @@ ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
}
free_cpuset(trialcs);
- if (force_sd_rebuild)
- rebuild_sched_domains_locked();
out_unlock:
+ update_hk_sched_domains();
cpuset_full_unlock();
if (of_cft(of)->private == FILE_MEMLIST)
schedule_flush_migrate_mm();
@@ -3278,6 +3338,7 @@ static ssize_t cpuset_partition_write(struct kernfs_open_file *of, char *buf,
cpuset_full_lock();
if (is_cpuset_online(cs))
retval = update_prstate(cs, val);
+ update_hk_sched_domains();
cpuset_full_unlock();
return retval ?: nbytes;
}
@@ -3452,6 +3513,7 @@ static void cpuset_css_killed(struct cgroup_subsys_state *css)
/* Reset valid partition back to member */
if (is_partition_valid(cs))
update_prstate(cs, PRS_MEMBER);
+ update_hk_sched_domains();
cpuset_full_unlock();
}
@@ -3607,6 +3669,7 @@ int __init cpuset_init(void)
BUG_ON(!alloc_cpumask_var(&top_cpuset.exclusive_cpus, GFP_KERNEL));
BUG_ON(!zalloc_cpumask_var(&subpartitions_cpus, GFP_KERNEL));
BUG_ON(!zalloc_cpumask_var(&isolated_cpus, GFP_KERNEL));
+ BUG_ON(!zalloc_cpumask_var(&isolated_hk_cpus, GFP_KERNEL));
cpumask_setall(top_cpuset.cpus_allowed);
nodes_setall(top_cpuset.mems_allowed);
@@ -3778,6 +3841,7 @@ unlock:
*/
static void cpuset_handle_hotplug(void)
{
+ static DECLARE_WORK(hk_sd_work, hk_sd_workfn);
static cpumask_t new_cpus;
static nodemask_t new_mems;
bool cpus_updated, mems_updated;
@@ -3859,9 +3923,21 @@ static void cpuset_handle_hotplug(void)
rcu_read_unlock();
}
- /* rebuild sched domains if necessary */
- if (force_sd_rebuild)
- rebuild_sched_domains_cpuslocked();
+
+ /*
+ * Queue a work to call housekeeping_update() & rebuild_sched_domains()
+ * There will be a slight delay before the HK_TYPE_DOMAIN housekeeping
+ * cpumask can correctly reflect what is in isolated_cpus.
+ *
+ * We rely on WORK_STRUCT_PENDING_BIT to not requeue a work item that
+ * is still pending. Before the pending bit is cleared, the work data
+ * is copied out and work item dequeued. So it is possible to queue
+ * the work again before the hk_sd_workfn() is invoked to process the
+ * previously queued work. Since hk_sd_workfn() doesn't use the work
+ * item at all, this is not a problem.
+ */
+ if (update_housekeeping || force_sd_rebuild)
+ queue_work(system_unbound_wq, &hk_sd_work);
free_tmpmasks(ptmp);
}
diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h
index f476c63b668c..e89f175e9c2d 100644
--- a/kernel/dma/direct.h
+++ b/kernel/dma/direct.h
@@ -85,7 +85,7 @@ static inline dma_addr_t dma_direct_map_phys(struct device *dev,
if (is_swiotlb_force_bounce(dev)) {
if (attrs & DMA_ATTR_MMIO)
- goto err_overflow;
+ return DMA_MAPPING_ERROR;
return swiotlb_map(dev, phys, size, dir, attrs);
}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index ac70d68217b6..1f5699b339ec 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4138,7 +4138,8 @@ static int merge_sched_in(struct perf_event *event, void *data)
if (*perf_event_fasync(event))
event->pending_kill = POLL_ERR;
- perf_event_wakeup(event);
+ event->pending_wakeup = 1;
+ irq_work_queue(&event->pending_irq);
} else {
struct perf_cpu_pmu_context *cpc = this_cpc(event->pmu_ctx->pmu);
@@ -7464,28 +7465,28 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
ret = perf_mmap_aux(vma, event, nr_pages);
if (ret)
return ret;
- }
- /*
- * Since pinned accounting is per vm we cannot allow fork() to copy our
- * vma.
- */
- vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP);
- vma->vm_ops = &perf_mmap_vmops;
+ /*
+ * Since pinned accounting is per vm we cannot allow fork() to copy our
+ * vma.
+ */
+ vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP);
+ vma->vm_ops = &perf_mmap_vmops;
- mapped = get_mapped(event, event_mapped);
- if (mapped)
- mapped(event, vma->vm_mm);
+ mapped = get_mapped(event, event_mapped);
+ if (mapped)
+ mapped(event, vma->vm_mm);
- /*
- * Try to map it into the page table. On fail, invoke
- * perf_mmap_close() to undo the above, as the callsite expects
- * full cleanup in this case and therefore does not invoke
- * vmops::close().
- */
- ret = map_range(event->rb, vma);
- if (ret)
- perf_mmap_close(vma);
+ /*
+ * Try to map it into the page table. On fail, invoke
+ * perf_mmap_close() to undo the above, as the callsite expects
+ * full cleanup in this case and therefore does not invoke
+ * vmops::close().
+ */
+ ret = map_range(event->rb, vma);
+ if (ret)
+ perf_mmap_close(vma);
+ }
return ret;
}
@@ -10776,6 +10777,13 @@ int perf_event_overflow(struct perf_event *event,
struct perf_sample_data *data,
struct pt_regs *regs)
{
+ /*
+ * Entry point from hardware PMI, interrupts should be disabled here.
+ * This serializes us against perf_event_remove_from_context() in
+ * things like perf_event_release_kernel().
+ */
+ lockdep_assert_irqs_disabled();
+
return __perf_event_overflow(event, 1, data, regs);
}
@@ -10852,6 +10860,19 @@ static void perf_swevent_event(struct perf_event *event, u64 nr,
{
struct hw_perf_event *hwc = &event->hw;
+ /*
+ * This is:
+ * - software preempt
+ * - tracepoint preempt
+ * - tp_target_task irq (ctx->lock)
+ * - uprobes preempt/irq
+ * - kprobes preempt/irq
+ * - hw_breakpoint irq
+ *
+ * Any of these are sufficient to hold off RCU and thus ensure @event
+ * exists.
+ */
+ lockdep_assert_preemption_disabled();
local64_add(nr, &event->count);
if (!regs)
@@ -10860,6 +10881,16 @@ static void perf_swevent_event(struct perf_event *event, u64 nr,
if (!is_sampling_event(event))
return;
+ /*
+ * Serialize against event_function_call() IPIs like normal overflow
+ * event handling. Specifically, must not allow
+ * perf_event_release_kernel() -> perf_remove_from_context() to make
+ * progress and 'release' the event from under us.
+ */
+ guard(irqsave)();
+ if (event->state != PERF_EVENT_STATE_ACTIVE)
+ return;
+
if ((event->attr.sample_type & PERF_SAMPLE_PERIOD) && !event->attr.freq) {
data->period = nr;
return perf_swevent_overflow(event, 1, data, regs);
@@ -11358,6 +11389,11 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
struct perf_sample_data data;
struct perf_event *event;
+ /*
+ * Per being a tracepoint, this runs with preemption disabled.
+ */
+ lockdep_assert_preemption_disabled();
+
struct perf_raw_record raw = {
.frag = {
.size = entry_size,
@@ -11690,6 +11726,11 @@ void perf_bp_event(struct perf_event *bp, void *data)
struct perf_sample_data sample;
struct pt_regs *regs = data;
+ /*
+ * Exception context, will have interrupts disabled.
+ */
+ lockdep_assert_irqs_disabled();
+
perf_sample_data_init(&sample, bp->attr.bp_addr, 0);
if (!bp->hw.state && !perf_exclude_event(bp, regs))
@@ -12154,7 +12195,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
if (regs && !perf_exclude_event(event, regs)) {
if (!(event->attr.exclude_idle && is_idle_task(current)))
- if (__perf_event_overflow(event, 1, &data, regs))
+ if (perf_event_overflow(event, &data, regs))
ret = HRTIMER_NORESTART;
}
diff --git a/kernel/exit.c b/kernel/exit.c
index 8a87021211ae..ede3117fa7d4 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -896,11 +896,16 @@ static void synchronize_group_exit(struct task_struct *tsk, long code)
void __noreturn do_exit(long code)
{
struct task_struct *tsk = current;
+ struct kthread *kthread;
int group_dead;
WARN_ON(irqs_disabled());
WARN_ON(tsk->plug);
+ kthread = tsk_is_kthread(tsk);
+ if (unlikely(kthread))
+ kthread_do_exit(kthread, code);
+
kcov_task_exit(tsk);
kmsan_task_exit(tsk);
@@ -1013,6 +1018,7 @@ void __noreturn do_exit(long code)
lockdep_free_task(tsk);
do_task_dead();
}
+EXPORT_SYMBOL(do_exit);
void __noreturn make_task_dead(int signr)
{
diff --git a/kernel/kcsan/kcsan_test.c b/kernel/kcsan/kcsan_test.c
index 79e655ea4ca1..ae758150ccb9 100644
--- a/kernel/kcsan/kcsan_test.c
+++ b/kernel/kcsan/kcsan_test.c
@@ -168,7 +168,7 @@ static bool __report_matches(const struct expect_report *r)
if (!report_available())
return false;
- expect = kmalloc_obj(observed.lines);
+ expect = (typeof(expect))kmalloc_obj(observed.lines);
if (WARN_ON(!expect))
return false;
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 20451b624b67..791210daf8b4 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -85,24 +85,6 @@ static inline struct kthread *to_kthread(struct task_struct *k)
return k->worker_private;
}
-/*
- * Variant of to_kthread() that doesn't assume @p is a kthread.
- *
- * When "(p->flags & PF_KTHREAD)" is set the task is a kthread and will
- * always remain a kthread. For kthreads p->worker_private always
- * points to a struct kthread. For tasks that are not kthreads
- * p->worker_private is used to point to other things.
- *
- * Return NULL for any task that is not a kthread.
- */
-static inline struct kthread *__to_kthread(struct task_struct *p)
-{
- void *kthread = p->worker_private;
- if (kthread && !(p->flags & PF_KTHREAD))
- kthread = NULL;
- return kthread;
-}
-
void get_kthread_comm(char *buf, size_t buf_size, struct task_struct *tsk)
{
struct kthread *kthread = to_kthread(tsk);
@@ -193,7 +175,7 @@ EXPORT_SYMBOL_GPL(kthread_should_park);
bool kthread_should_stop_or_park(void)
{
- struct kthread *kthread = __to_kthread(current);
+ struct kthread *kthread = tsk_is_kthread(current);
if (!kthread)
return false;
@@ -234,7 +216,7 @@ EXPORT_SYMBOL_GPL(kthread_freezable_should_stop);
*/
void *kthread_func(struct task_struct *task)
{
- struct kthread *kthread = __to_kthread(task);
+ struct kthread *kthread = tsk_is_kthread(task);
if (kthread)
return kthread->threadfn;
return NULL;
@@ -266,7 +248,7 @@ EXPORT_SYMBOL_GPL(kthread_data);
*/
void *kthread_probe_data(struct task_struct *task)
{
- struct kthread *kthread = __to_kthread(task);
+ struct kthread *kthread = tsk_is_kthread(task);
void *data = NULL;
if (kthread)
@@ -309,19 +291,8 @@ void kthread_parkme(void)
}
EXPORT_SYMBOL_GPL(kthread_parkme);
-/**
- * kthread_exit - Cause the current kthread return @result to kthread_stop().
- * @result: The integer value to return to kthread_stop().
- *
- * While kthread_exit can be called directly, it exists so that
- * functions which do some additional work in non-modular code such as
- * module_put_and_kthread_exit can be implemented.
- *
- * Does not return.
- */
-void __noreturn kthread_exit(long result)
+void kthread_do_exit(struct kthread *kthread, long result)
{
- struct kthread *kthread = to_kthread(current);
kthread->result = result;
if (!list_empty(&kthread->affinity_node)) {
mutex_lock(&kthread_affinity_lock);
@@ -333,9 +304,7 @@ void __noreturn kthread_exit(long result)
kthread->preferred_affinity = NULL;
}
}
- do_exit(0);
}
-EXPORT_SYMBOL(kthread_exit);
/**
* kthread_complete_and_exit - Exit the current kthread.
@@ -683,7 +652,7 @@ void kthread_set_per_cpu(struct task_struct *k, int cpu)
bool kthread_is_per_cpu(struct task_struct *p)
{
- struct kthread *kthread = __to_kthread(p);
+ struct kthread *kthread = tsk_is_kthread(p);
if (!kthread)
return false;
diff --git a/kernel/liveupdate/luo_file.c b/kernel/liveupdate/luo_file.c
index 8c79058253e1..5acee4174bf0 100644
--- a/kernel/liveupdate/luo_file.c
+++ b/kernel/liveupdate/luo_file.c
@@ -134,9 +134,12 @@ static LIST_HEAD(luo_file_handler_list);
* state that is not preserved. Set by the handler's .preserve()
* callback, and must be freed in the handler's .unpreserve()
* callback.
- * @retrieved: A flag indicating whether a user/kernel in the new kernel has
+ * @retrieve_status: Status code indicating whether a user/kernel in the new kernel has
* successfully called retrieve() on this file. This prevents
- * multiple retrieval attempts.
+ * multiple retrieval attempts. A value of 0 means a retrieve()
+ * has not been attempted, a positive value means the retrieve()
+ * was successful, and a negative value means the retrieve()
+ * failed, and the value is the error code of the call.
* @mutex: A mutex that protects the fields of this specific instance
* (e.g., @retrieved, @file), ensuring that operations like
* retrieving or finishing a file are atomic.
@@ -161,7 +164,7 @@ struct luo_file {
struct file *file;
u64 serialized_data;
void *private_data;
- bool retrieved;
+ int retrieve_status;
struct mutex mutex;
struct list_head list;
u64 token;
@@ -298,7 +301,6 @@ int luo_preserve_file(struct luo_file_set *file_set, u64 token, int fd)
luo_file->file = file;
luo_file->fh = fh;
luo_file->token = token;
- luo_file->retrieved = false;
mutex_init(&luo_file->mutex);
args.handler = fh;
@@ -577,7 +579,12 @@ int luo_retrieve_file(struct luo_file_set *file_set, u64 token,
return -ENOENT;
guard(mutex)(&luo_file->mutex);
- if (luo_file->retrieved) {
+ if (luo_file->retrieve_status < 0) {
+ /* Retrieve was attempted and it failed. Return the error code. */
+ return luo_file->retrieve_status;
+ }
+
+ if (luo_file->retrieve_status > 0) {
/*
* Someone is asking for this file again, so get a reference
* for them.
@@ -590,16 +597,19 @@ int luo_retrieve_file(struct luo_file_set *file_set, u64 token,
args.handler = luo_file->fh;
args.serialized_data = luo_file->serialized_data;
err = luo_file->fh->ops->retrieve(&args);
- if (!err) {
- luo_file->file = args.file;
-
- /* Get reference so we can keep this file in LUO until finish */
- get_file(luo_file->file);
- *filep = luo_file->file;
- luo_file->retrieved = true;
+ if (err) {
+ /* Keep the error code for later use. */
+ luo_file->retrieve_status = err;
+ return err;
}
- return err;
+ luo_file->file = args.file;
+ /* Get reference so we can keep this file in LUO until finish */
+ get_file(luo_file->file);
+ *filep = luo_file->file;
+ luo_file->retrieve_status = 1;
+
+ return 0;
}
static int luo_file_can_finish_one(struct luo_file_set *file_set,
@@ -615,7 +625,7 @@ static int luo_file_can_finish_one(struct luo_file_set *file_set,
args.handler = luo_file->fh;
args.file = luo_file->file;
args.serialized_data = luo_file->serialized_data;
- args.retrieved = luo_file->retrieved;
+ args.retrieve_status = luo_file->retrieve_status;
can_finish = luo_file->fh->ops->can_finish(&args);
}
@@ -632,7 +642,7 @@ static void luo_file_finish_one(struct luo_file_set *file_set,
args.handler = luo_file->fh;
args.file = luo_file->file;
args.serialized_data = luo_file->serialized_data;
- args.retrieved = luo_file->retrieved;
+ args.retrieve_status = luo_file->retrieve_status;
luo_file->fh->ops->finish(&args);
luo_flb_file_finish(luo_file->fh);
@@ -788,7 +798,6 @@ int luo_file_deserialize(struct luo_file_set *file_set,
luo_file->file = NULL;
luo_file->serialized_data = file_ser[i].data;
luo_file->token = file_ser[i].token;
- luo_file->retrieved = false;
mutex_init(&luo_file->mutex);
list_add_tail(&luo_file->list, &file_set->files_list);
}
diff --git a/kernel/module/Kconfig b/kernel/module/Kconfig
index be74917802ad..43b1bb01fd27 100644
--- a/kernel/module/Kconfig
+++ b/kernel/module/Kconfig
@@ -169,9 +169,10 @@ config MODVERSIONS
make them incompatible with the kernel you are running. If
unsure, say N.
+if MODVERSIONS
+
choice
prompt "Module versioning implementation"
- depends on MODVERSIONS
help
Select the tool used to calculate symbol versions for modules.
@@ -206,7 +207,7 @@ endchoice
config ASM_MODVERSIONS
bool
- default HAVE_ASM_MODVERSIONS && MODVERSIONS
+ default HAVE_ASM_MODVERSIONS
help
This enables module versioning for exported symbols also from
assembly. This can be enabled only when the target architecture
@@ -214,7 +215,6 @@ config ASM_MODVERSIONS
config EXTENDED_MODVERSIONS
bool "Extended Module Versioning Support"
- depends on MODVERSIONS
help
This enables extended MODVERSIONs support, allowing long symbol
names to be versioned.
@@ -224,7 +224,6 @@ config EXTENDED_MODVERSIONS
config BASIC_MODVERSIONS
bool "Basic Module Versioning Support"
- depends on MODVERSIONS
default y
help
This enables basic MODVERSIONS support, allowing older tools or
@@ -237,6 +236,8 @@ config BASIC_MODVERSIONS
This is enabled by default when MODVERSIONS are enabled.
If unsure, say Y.
+endif # MODVERSIONS
+
config MODULE_SRCVERSION_ALL
bool "Source checksum for all modules"
help
@@ -277,10 +278,11 @@ config MODULE_SIG_FORCE
Reject unsigned modules or signed modules for which we don't have a
key. Without this, such modules will simply taint the kernel.
+if MODULE_SIG || IMA_APPRAISE_MODSIG
+
config MODULE_SIG_ALL
bool "Automatically sign all modules"
default y
- depends on MODULE_SIG || IMA_APPRAISE_MODSIG
help
Sign all modules during make modules_install. Without this option,
modules must be signed manually, using the scripts/sign-file tool.
@@ -290,7 +292,6 @@ comment "Do not forget to sign required modules with scripts/sign-file"
choice
prompt "Hash algorithm to sign modules"
- depends on MODULE_SIG || IMA_APPRAISE_MODSIG
default MODULE_SIG_SHA512
help
This determines which sort of hashing algorithm will be used during
@@ -327,7 +328,6 @@ endchoice
config MODULE_SIG_HASH
string
- depends on MODULE_SIG || IMA_APPRAISE_MODSIG
default "sha256" if MODULE_SIG_SHA256
default "sha384" if MODULE_SIG_SHA384
default "sha512" if MODULE_SIG_SHA512
@@ -335,6 +335,8 @@ config MODULE_SIG_HASH
default "sha3-384" if MODULE_SIG_SHA3_384
default "sha3-512" if MODULE_SIG_SHA3_512
+endif # MODULE_SIG || IMA_APPRAISE_MODSIG
+
config MODULE_COMPRESS
bool "Module compression"
help
@@ -350,9 +352,10 @@ config MODULE_COMPRESS
If unsure, say N.
+if MODULE_COMPRESS
+
choice
prompt "Module compression type"
- depends on MODULE_COMPRESS
help
Choose the supported algorithm for module compression.
@@ -379,7 +382,6 @@ endchoice
config MODULE_COMPRESS_ALL
bool "Automatically compress all modules"
default y
- depends on MODULE_COMPRESS
help
Compress all modules during 'make modules_install'.
@@ -389,7 +391,6 @@ config MODULE_COMPRESS_ALL
config MODULE_DECOMPRESS
bool "Support in-kernel module decompression"
- depends on MODULE_COMPRESS
select ZLIB_INFLATE if MODULE_COMPRESS_GZIP
select XZ_DEC if MODULE_COMPRESS_XZ
select ZSTD_DECOMPRESS if MODULE_COMPRESS_ZSTD
@@ -400,6 +401,8 @@ config MODULE_DECOMPRESS
If unsure, say N.
+endif # MODULE_COMPRESS
+
config MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS
bool "Allow loading of modules with missing namespace imports"
help
diff --git a/kernel/module/main.c b/kernel/module/main.c
index 2bac4c7cd019..c3ce106c70af 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -1568,6 +1568,13 @@ static int simplify_symbols(struct module *mod, const struct load_info *info)
break;
default:
+ if (sym[i].st_shndx >= info->hdr->e_shnum) {
+ pr_err("%s: Symbol %s has an invalid section index %u (max %u)\n",
+ mod->name, name, sym[i].st_shndx, info->hdr->e_shnum - 1);
+ ret = -ENOEXEC;
+ break;
+ }
+
/* Divert to percpu allocation if a percpu var. */
if (sym[i].st_shndx == info->index.pcpu)
secbase = (unsigned long)mod_percpu(mod);
@@ -3544,12 +3551,6 @@ static int load_module(struct load_info *info, const char __user *uargs,
mutex_unlock(&module_mutex);
free_module:
mod_stat_bump_invalid(info, flags);
- /* Free lock-classes; relies on the preceding sync_rcu() */
- for_class_mod_mem_type(type, core_data) {
- lockdep_free_key_range(mod->mem[type].base,
- mod->mem[type].size);
- }
-
module_memory_restore_rox(mod);
module_deallocate(mod, info);
free_copy:
diff --git a/kernel/nscommon.c b/kernel/nscommon.c
index bdc3c86231d3..3166c1fd844a 100644
--- a/kernel/nscommon.c
+++ b/kernel/nscommon.c
@@ -309,3 +309,9 @@ void __ns_ref_active_get(struct ns_common *ns)
return;
}
}
+
+bool may_see_all_namespaces(void)
+{
+ return (task_active_pid_ns(current) == &init_pid_ns) &&
+ ns_capable_noaudit(init_pid_ns.user_ns, CAP_SYS_ADMIN);
+}
diff --git a/kernel/nstree.c b/kernel/nstree.c
index f36c59e6951d..6d12e5900ac0 100644
--- a/kernel/nstree.c
+++ b/kernel/nstree.c
@@ -515,32 +515,11 @@ static inline bool __must_check ns_requested(const struct klistns *kls,
static inline bool __must_check may_list_ns(const struct klistns *kls,
struct ns_common *ns)
{
- if (kls->user_ns) {
- if (kls->userns_capable)
- return true;
- } else {
- struct ns_common *owner;
- struct user_namespace *user_ns;
-
- owner = ns_owner(ns);
- if (owner)
- user_ns = to_user_ns(owner);
- else
- user_ns = &init_user_ns;
- if (ns_capable_noaudit(user_ns, CAP_SYS_ADMIN))
- return true;
- }
-
- if (is_current_namespace(ns))
+ if (kls->user_ns && kls->userns_capable)
return true;
-
- if (ns->ns_type != CLONE_NEWUSER)
- return false;
-
- if (ns_capable_noaudit(to_user_ns(ns), CAP_SYS_ADMIN))
+ if (is_current_namespace(ns))
return true;
-
- return false;
+ return may_see_all_namespaces();
}
static inline void ns_put(struct ns_common *ns)
@@ -600,7 +579,7 @@ static ssize_t do_listns_userns(struct klistns *kls)
ret = 0;
head = &to_ns_common(kls->user_ns)->ns_owner_root.ns_list_head;
- kls->userns_capable = ns_capable_noaudit(kls->user_ns, CAP_SYS_ADMIN);
+ kls->userns_capable = may_see_all_namespaces();
rcu_read_lock();
diff --git a/kernel/rseq.c b/kernel/rseq.c
index b0973d19f366..38d3ef540760 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -80,6 +80,7 @@
#include <linux/syscalls.h>
#include <linux/uaccess.h>
#include <linux/types.h>
+#include <linux/rseq.h>
#include <asm/ptrace.h>
#define CREATE_TRACE_POINTS
@@ -449,13 +450,14 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32
* auxiliary vector AT_RSEQ_ALIGN. If rseq_len is the original rseq
* size, the required alignment is the original struct rseq alignment.
*
- * In order to be valid, rseq_len is either the original rseq size, or
- * large enough to contain all supported fields, as communicated to
+ * The rseq_len is required to be greater or equal to the original rseq
+ * size. In order to be valid, rseq_len is either the original rseq size,
+ * or large enough to contain all supported fields, as communicated to
* user-space through the ELF auxiliary vector AT_RSEQ_FEATURE_SIZE.
*/
if (rseq_len < ORIG_RSEQ_SIZE ||
(rseq_len == ORIG_RSEQ_SIZE && !IS_ALIGNED((unsigned long)rseq, ORIG_RSEQ_SIZE)) ||
- (rseq_len != ORIG_RSEQ_SIZE && (!IS_ALIGNED((unsigned long)rseq, __alignof__(*rseq)) ||
+ (rseq_len != ORIG_RSEQ_SIZE && (!IS_ALIGNED((unsigned long)rseq, rseq_alloc_align()) ||
rseq_len < offsetof(struct rseq, end))))
return -EINVAL;
if (!access_ok(rseq, rseq_len))
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 759777694c78..b7f77c165a6e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6830,6 +6830,7 @@ static void __sched notrace __schedule(int sched_mode)
/* SCX must consult the BPF scheduler to tell if rq is empty */
if (!rq->nr_running && !scx_enabled()) {
next = prev;
+ rq->next_class = &idle_sched_class;
goto picked;
}
} else if (!preempt && prev_state) {
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 62b1f3ac5630..1594987d637b 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -976,8 +976,12 @@ static bool scx_dsq_priq_less(struct rb_node *node_a,
static void dsq_mod_nr(struct scx_dispatch_q *dsq, s32 delta)
{
- /* scx_bpf_dsq_nr_queued() reads ->nr without locking, use WRITE_ONCE() */
- WRITE_ONCE(dsq->nr, dsq->nr + delta);
+ /*
+ * scx_bpf_dsq_nr_queued() reads ->nr without locking. Use READ_ONCE()
+ * on the read side and WRITE_ONCE() on the write side to properly
+ * annotate the concurrent lockless access and avoid KCSAN warnings.
+ */
+ WRITE_ONCE(dsq->nr, READ_ONCE(dsq->nr) + delta);
}
static void refill_task_slice_dfl(struct scx_sched *sch, struct task_struct *p)
@@ -2460,7 +2464,7 @@ do_pick_task_scx(struct rq *rq, struct rq_flags *rf, bool force_scx)
/* see kick_cpus_irq_workfn() */
smp_store_release(&rq->scx.kick_sync, rq->scx.kick_sync + 1);
- rq->next_class = &ext_sched_class;
+ rq_modified_begin(rq, &ext_sched_class);
rq_unpin_lock(rq, rf);
balance_one(rq, prev);
@@ -2475,7 +2479,7 @@ do_pick_task_scx(struct rq *rq, struct rq_flags *rf, bool force_scx)
* If @force_scx is true, always try to pick a SCHED_EXT task,
* regardless of any higher-priority sched classes activity.
*/
- if (!force_scx && sched_class_above(rq->next_class, &ext_sched_class))
+ if (!force_scx && rq_modified_above(rq, &ext_sched_class))
return RETRY_TASK;
keep_prev = rq->scx.flags & SCX_RQ_BAL_KEEP;
@@ -2735,7 +2739,7 @@ static bool check_rq_for_timeouts(struct rq *rq)
unsigned long last_runnable = p->scx.runnable_at;
if (unlikely(time_after(jiffies,
- last_runnable + scx_watchdog_timeout))) {
+ last_runnable + READ_ONCE(scx_watchdog_timeout)))) {
u32 dur_ms = jiffies_to_msecs(jiffies - last_runnable);
scx_exit(sch, SCX_EXIT_ERROR_STALL, 0,
@@ -2763,7 +2767,7 @@ static void scx_watchdog_workfn(struct work_struct *work)
cond_resched();
}
queue_delayed_work(system_unbound_wq, to_delayed_work(work),
- scx_watchdog_timeout / 2);
+ READ_ONCE(scx_watchdog_timeout) / 2);
}
void scx_tick(struct rq *rq)
@@ -3585,7 +3589,6 @@ static int scx_cgroup_init(struct scx_sched *sch)
ret = SCX_CALL_OP_RET(sch, SCX_KF_UNLOCKED, cgroup_init, NULL,
css->cgroup, &args);
if (ret) {
- css_put(css);
scx_error(sch, "ops.cgroup_init() failed (%d)", ret);
return ret;
}
@@ -3708,7 +3711,9 @@ static void scx_kobj_release(struct kobject *kobj)
static ssize_t scx_attr_ops_show(struct kobject *kobj,
struct kobj_attribute *ka, char *buf)
{
- return sysfs_emit(buf, "%s\n", scx_root->ops.name);
+ struct scx_sched *sch = container_of(kobj, struct scx_sched, kobj);
+
+ return sysfs_emit(buf, "%s\n", sch->ops.name);
}
SCX_ATTR(ops);
@@ -3752,7 +3757,9 @@ static const struct kobj_type scx_ktype = {
static int scx_uevent(const struct kobject *kobj, struct kobj_uevent_env *env)
{
- return add_uevent_var(env, "SCXOPS=%s", scx_root->ops.name);
+ const struct scx_sched *sch = container_of(kobj, struct scx_sched, kobj);
+
+ return add_uevent_var(env, "SCXOPS=%s", sch->ops.name);
}
static const struct kset_uevent_ops scx_uevent_ops = {
@@ -4423,10 +4430,19 @@ done:
scx_bypass(false);
}
+/*
+ * Claim the exit on @sch. The caller must ensure that the helper kthread work
+ * is kicked before the current task can be preempted. Once exit_kind is
+ * claimed, scx_error() can no longer trigger, so if the current task gets
+ * preempted and the BPF scheduler fails to schedule it back, the helper work
+ * will never be kicked and the whole system can wedge.
+ */
static bool scx_claim_exit(struct scx_sched *sch, enum scx_exit_kind kind)
{
int none = SCX_EXIT_NONE;
+ lockdep_assert_preemption_disabled();
+
if (!atomic_try_cmpxchg(&sch->exit_kind, &none, kind))
return false;
@@ -4449,6 +4465,7 @@ static void scx_disable(enum scx_exit_kind kind)
rcu_read_lock();
sch = rcu_dereference(scx_root);
if (sch) {
+ guard(preempt)();
scx_claim_exit(sch, kind);
kthread_queue_work(sch->helper, &sch->disable_work);
}
@@ -4771,6 +4788,8 @@ static bool scx_vexit(struct scx_sched *sch,
{
struct scx_exit_info *ei = sch->exit_info;
+ guard(preempt)();
+
if (!scx_claim_exit(sch, kind))
return false;
@@ -4955,20 +4974,30 @@ static int validate_ops(struct scx_sched *sch, const struct sched_ext_ops *ops)
return 0;
}
-static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
+/*
+ * scx_enable() is offloaded to a dedicated system-wide RT kthread to avoid
+ * starvation. During the READY -> ENABLED task switching loop, the calling
+ * thread's sched_class gets switched from fair to ext. As fair has higher
+ * priority than ext, the calling thread can be indefinitely starved under
+ * fair-class saturation, leading to a system hang.
+ */
+struct scx_enable_cmd {
+ struct kthread_work work;
+ struct sched_ext_ops *ops;
+ int ret;
+};
+
+static void scx_enable_workfn(struct kthread_work *work)
{
+ struct scx_enable_cmd *cmd =
+ container_of(work, struct scx_enable_cmd, work);
+ struct sched_ext_ops *ops = cmd->ops;
struct scx_sched *sch;
struct scx_task_iter sti;
struct task_struct *p;
unsigned long timeout;
int i, cpu, ret;
- if (!cpumask_equal(housekeeping_cpumask(HK_TYPE_DOMAIN),
- cpu_possible_mask)) {
- pr_err("sched_ext: Not compatible with \"isolcpus=\" domain isolation\n");
- return -EINVAL;
- }
-
mutex_lock(&scx_enable_mutex);
if (scx_enable_state() != SCX_DISABLED) {
@@ -5060,7 +5089,7 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
WRITE_ONCE(scx_watchdog_timeout, timeout);
WRITE_ONCE(scx_watchdog_timestamp, jiffies);
queue_delayed_work(system_unbound_wq, &scx_watchdog_work,
- scx_watchdog_timeout / 2);
+ READ_ONCE(scx_watchdog_timeout) / 2);
/*
* Once __scx_enabled is set, %current can be switched to SCX anytime.
@@ -5185,13 +5214,15 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
atomic_long_inc(&scx_enable_seq);
- return 0;
+ cmd->ret = 0;
+ return;
err_free_ksyncs:
free_kick_syncs();
err_unlock:
mutex_unlock(&scx_enable_mutex);
- return ret;
+ cmd->ret = ret;
+ return;
err_disable_unlock_all:
scx_cgroup_unlock();
@@ -5210,7 +5241,41 @@ err_disable:
*/
scx_error(sch, "scx_enable() failed (%d)", ret);
kthread_flush_work(&sch->disable_work);
- return 0;
+ cmd->ret = 0;
+}
+
+static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
+{
+ static struct kthread_worker *helper;
+ static DEFINE_MUTEX(helper_mutex);
+ struct scx_enable_cmd cmd;
+
+ if (!cpumask_equal(housekeeping_cpumask(HK_TYPE_DOMAIN),
+ cpu_possible_mask)) {
+ pr_err("sched_ext: Not compatible with \"isolcpus=\" domain isolation\n");
+ return -EINVAL;
+ }
+
+ if (!READ_ONCE(helper)) {
+ mutex_lock(&helper_mutex);
+ if (!helper) {
+ helper = kthread_run_worker(0, "scx_enable_helper");
+ if (IS_ERR_OR_NULL(helper)) {
+ helper = NULL;
+ mutex_unlock(&helper_mutex);
+ return -ENOMEM;
+ }
+ sched_set_fifo(helper->task);
+ }
+ mutex_unlock(&helper_mutex);
+ }
+
+ kthread_init_work(&cmd.work, scx_enable_workfn);
+ cmd.ops = ops;
+
+ kthread_queue_work(READ_ONCE(helper), &cmd.work);
+ kthread_flush_work(&cmd.work);
+ return cmd.ret;
}
diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c
index c5a3b0bac7c3..ba298ac3ce6c 100644
--- a/kernel/sched/ext_idle.c
+++ b/kernel/sched/ext_idle.c
@@ -663,9 +663,8 @@ void scx_idle_init_masks(void)
BUG_ON(!alloc_cpumask_var(&scx_idle_global_masks.cpu, GFP_KERNEL));
BUG_ON(!alloc_cpumask_var(&scx_idle_global_masks.smt, GFP_KERNEL));
- /* Allocate per-node idle cpumasks */
- scx_idle_node_masks = kzalloc_objs(*scx_idle_node_masks,
- num_possible_nodes());
+ /* Allocate per-node idle cpumasks (use nr_node_ids for non-contiguous NUMA nodes) */
+ scx_idle_node_masks = kzalloc_objs(*scx_idle_node_masks, nr_node_ids);
BUG_ON(!scx_idle_node_masks);
for_each_node(i) {
diff --git a/kernel/sched/ext_internal.h b/kernel/sched/ext_internal.h
index 386c677e4c9a..11ebb744d893 100644
--- a/kernel/sched/ext_internal.h
+++ b/kernel/sched/ext_internal.h
@@ -74,7 +74,7 @@ enum scx_exit_flags {
* info communication. The following flag indicates whether ops.init()
* finished successfully.
*/
- SCX_EFLAG_INITIALIZED,
+ SCX_EFLAG_INITIALIZED = 1LLU << 0,
};
/*
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index eea99ec01a3f..bf948db905ed 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -589,6 +589,21 @@ static inline bool entity_before(const struct sched_entity *a,
return vruntime_cmp(a->deadline, "<", b->deadline);
}
+/*
+ * Per avg_vruntime() below, cfs_rq::zero_vruntime is only slightly stale
+ * and this value should be no more than two lag bounds. Which puts it in the
+ * general order of:
+ *
+ * (slice + TICK_NSEC) << NICE_0_LOAD_SHIFT
+ *
+ * which is around 44 bits in size (on 64bit); that is 20 for
+ * NICE_0_LOAD_SHIFT, another 20 for NSEC_PER_MSEC and then a handful for
+ * however many msec the actual slice+tick ends up begin.
+ *
+ * (disregarding the actual divide-by-weight part makes for the worst case
+ * weight of 2, which nicely cancels vs the fuzz in zero_vruntime not actually
+ * being the zero-lag point).
+ */
static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
return vruntime_op(se->vruntime, "-", cfs_rq->zero_vruntime);
@@ -676,41 +691,65 @@ sum_w_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se)
}
static inline
-void sum_w_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
+void update_zero_vruntime(struct cfs_rq *cfs_rq, s64 delta)
{
/*
- * v' = v + d ==> sum_w_vruntime' = sum_runtime - d*sum_weight
+ * v' = v + d ==> sum_w_vruntime' = sum_w_vruntime - d*sum_weight
*/
cfs_rq->sum_w_vruntime -= cfs_rq->sum_weight * delta;
+ cfs_rq->zero_vruntime += delta;
}
/*
- * Specifically: avg_runtime() + 0 must result in entity_eligible() := true
+ * Specifically: avg_vruntime() + 0 must result in entity_eligible() := true
* For this to be so, the result of this function must have a left bias.
+ *
+ * Called in:
+ * - place_entity() -- before enqueue
+ * - update_entity_lag() -- before dequeue
+ * - entity_tick()
+ *
+ * This means it is one entry 'behind' but that puts it close enough to where
+ * the bound on entity_key() is at most two lag bounds.
*/
u64 avg_vruntime(struct cfs_rq *cfs_rq)
{
struct sched_entity *curr = cfs_rq->curr;
- s64 avg = cfs_rq->sum_w_vruntime;
- long load = cfs_rq->sum_weight;
+ long weight = cfs_rq->sum_weight;
+ s64 delta = 0;
- if (curr && curr->on_rq) {
- unsigned long weight = scale_load_down(curr->load.weight);
+ if (curr && !curr->on_rq)
+ curr = NULL;
- avg += entity_key(cfs_rq, curr) * weight;
- load += weight;
- }
+ if (weight) {
+ s64 runtime = cfs_rq->sum_w_vruntime;
+
+ if (curr) {
+ unsigned long w = scale_load_down(curr->load.weight);
+
+ runtime += entity_key(cfs_rq, curr) * w;
+ weight += w;
+ }
- if (load) {
/* sign flips effective floor / ceiling */
- if (avg < 0)
- avg -= (load - 1);
- avg = div_s64(avg, load);
+ if (runtime < 0)
+ runtime -= (weight - 1);
+
+ delta = div_s64(runtime, weight);
+ } else if (curr) {
+ /*
+ * When there is but one element, it is the average.
+ */
+ delta = curr->vruntime - cfs_rq->zero_vruntime;
}
- return cfs_rq->zero_vruntime + avg;
+ update_zero_vruntime(cfs_rq, delta);
+
+ return cfs_rq->zero_vruntime;
}
+static inline u64 cfs_rq_max_slice(struct cfs_rq *cfs_rq);
+
/*
* lag_i = S - s_i = w_i * (V - v_i)
*
@@ -724,17 +763,16 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq)
* EEVDF gives the following limit for a steady state system:
*
* -r_max < lag < max(r_max, q)
- *
- * XXX could add max_slice to the augmented data to track this.
*/
static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
+ u64 max_slice = cfs_rq_max_slice(cfs_rq) + TICK_NSEC;
s64 vlag, limit;
WARN_ON_ONCE(!se->on_rq);
vlag = avg_vruntime(cfs_rq) - se->vruntime;
- limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
+ limit = calc_delta_fair(max_slice, se);
se->vlag = clamp(vlag, -limit, limit);
}
@@ -777,16 +815,6 @@ int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se)
return vruntime_eligible(cfs_rq, se->vruntime);
}
-static void update_zero_vruntime(struct cfs_rq *cfs_rq)
-{
- u64 vruntime = avg_vruntime(cfs_rq);
- s64 delta = vruntime_op(vruntime, "-", cfs_rq->zero_vruntime);
-
- sum_w_vruntime_update(cfs_rq, delta);
-
- cfs_rq->zero_vruntime = vruntime;
-}
-
static inline u64 cfs_rq_min_slice(struct cfs_rq *cfs_rq)
{
struct sched_entity *root = __pick_root_entity(cfs_rq);
@@ -802,6 +830,21 @@ static inline u64 cfs_rq_min_slice(struct cfs_rq *cfs_rq)
return min_slice;
}
+static inline u64 cfs_rq_max_slice(struct cfs_rq *cfs_rq)
+{
+ struct sched_entity *root = __pick_root_entity(cfs_rq);
+ struct sched_entity *curr = cfs_rq->curr;
+ u64 max_slice = 0ULL;
+
+ if (curr && curr->on_rq)
+ max_slice = curr->slice;
+
+ if (root)
+ max_slice = max(max_slice, root->max_slice);
+
+ return max_slice;
+}
+
static inline bool __entity_less(struct rb_node *a, const struct rb_node *b)
{
return entity_before(__node_2_se(a), __node_2_se(b));
@@ -826,6 +869,15 @@ static inline void __min_slice_update(struct sched_entity *se, struct rb_node *n
}
}
+static inline void __max_slice_update(struct sched_entity *se, struct rb_node *node)
+{
+ if (node) {
+ struct sched_entity *rse = __node_2_se(node);
+ if (rse->max_slice > se->max_slice)
+ se->max_slice = rse->max_slice;
+ }
+}
+
/*
* se->min_vruntime = min(se->vruntime, {left,right}->min_vruntime)
*/
@@ -833,6 +885,7 @@ static inline bool min_vruntime_update(struct sched_entity *se, bool exit)
{
u64 old_min_vruntime = se->min_vruntime;
u64 old_min_slice = se->min_slice;
+ u64 old_max_slice = se->max_slice;
struct rb_node *node = &se->run_node;
se->min_vruntime = se->vruntime;
@@ -843,8 +896,13 @@ static inline bool min_vruntime_update(struct sched_entity *se, bool exit)
__min_slice_update(se, node->rb_right);
__min_slice_update(se, node->rb_left);
+ se->max_slice = se->slice;
+ __max_slice_update(se, node->rb_right);
+ __max_slice_update(se, node->rb_left);
+
return se->min_vruntime == old_min_vruntime &&
- se->min_slice == old_min_slice;
+ se->min_slice == old_min_slice &&
+ se->max_slice == old_max_slice;
}
RB_DECLARE_CALLBACKS(static, min_vruntime_cb, struct sched_entity,
@@ -856,7 +914,6 @@ RB_DECLARE_CALLBACKS(static, min_vruntime_cb, struct sched_entity,
static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
sum_w_vruntime_add(cfs_rq, se);
- update_zero_vruntime(cfs_rq);
se->min_vruntime = se->vruntime;
se->min_slice = se->slice;
rb_add_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
@@ -868,7 +925,6 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
rb_erase_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
&min_vruntime_cb);
sum_w_vruntime_sub(cfs_rq, se);
- update_zero_vruntime(cfs_rq);
}
struct sched_entity *__pick_root_entity(struct cfs_rq *cfs_rq)
@@ -3790,6 +3846,8 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
unsigned long weight)
{
bool curr = cfs_rq->curr == se;
+ bool rel_vprot = false;
+ u64 vprot;
if (se->on_rq) {
/* commit outstanding execution time */
@@ -3797,6 +3855,11 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
update_entity_lag(cfs_rq, se);
se->deadline -= se->vruntime;
se->rel_deadline = 1;
+ if (curr && protect_slice(se)) {
+ vprot = se->vprot - se->vruntime;
+ rel_vprot = true;
+ }
+
cfs_rq->nr_queued--;
if (!curr)
__dequeue_entity(cfs_rq, se);
@@ -3812,6 +3875,9 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
if (se->rel_deadline)
se->deadline = div_s64(se->deadline * se->load.weight, weight);
+ if (rel_vprot)
+ vprot = div_s64(vprot * se->load.weight, weight);
+
update_load_set(&se->load, weight);
do {
@@ -3823,6 +3889,8 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
enqueue_load_avg(cfs_rq, se);
if (se->on_rq) {
place_entity(cfs_rq, se, 0);
+ if (rel_vprot)
+ se->vprot = se->vruntime + vprot;
update_load_add(&cfs_rq->load, se->load.weight);
if (!curr)
__enqueue_entity(cfs_rq, se);
@@ -5420,7 +5488,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
}
static void
-set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
+set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, bool first)
{
clear_buddies(cfs_rq, se);
@@ -5435,7 +5503,8 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
__dequeue_entity(cfs_rq, se);
update_load_avg(cfs_rq, se, UPDATE_TG);
- set_protect_slice(cfs_rq, se);
+ if (first)
+ set_protect_slice(cfs_rq, se);
}
update_stats_curr_start(cfs_rq, se);
@@ -5524,6 +5593,11 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
update_load_avg(cfs_rq, curr, UPDATE_TG);
update_cfs_group(curr);
+ /*
+ * Pulls along cfs_rq::zero_vruntime.
+ */
+ avg_vruntime(cfs_rq);
+
#ifdef CONFIG_SCHED_HRTICK
/*
* queued ticks are scheduled to match the slice, so don't bother
@@ -8948,13 +9022,13 @@ again:
pse = parent_entity(pse);
}
if (se_depth >= pse_depth) {
- set_next_entity(cfs_rq_of(se), se);
+ set_next_entity(cfs_rq_of(se), se, true);
se = parent_entity(se);
}
}
put_prev_entity(cfs_rq, pse);
- set_next_entity(cfs_rq, se);
+ set_next_entity(cfs_rq, se, true);
__set_next_task_fair(rq, p, true);
}
@@ -12908,7 +12982,7 @@ static int sched_balance_newidle(struct rq *this_rq, struct rq_flags *rf)
t0 = sched_clock_cpu(this_cpu);
__sched_balance_update_blocked_averages(this_rq);
- this_rq->next_class = &fair_sched_class;
+ rq_modified_begin(this_rq, &fair_sched_class);
raw_spin_rq_unlock(this_rq);
for_each_domain(this_cpu, sd) {
@@ -12975,7 +13049,7 @@ static int sched_balance_newidle(struct rq *this_rq, struct rq_flags *rf)
pulled_task = 1;
/* If a higher prio class was modified, restart the pick */
- if (sched_class_above(this_rq->next_class, &fair_sched_class))
+ if (rq_modified_above(this_rq, &fair_sched_class))
pulled_task = -1;
out:
@@ -13568,7 +13642,7 @@ static void set_next_task_fair(struct rq *rq, struct task_struct *p, bool first)
for_each_sched_entity(se) {
struct cfs_rq *cfs_rq = cfs_rq_of(se);
- set_next_entity(cfs_rq, se);
+ set_next_entity(cfs_rq, se, first);
/* ensure bandwidth has been allocated on our new cfs_rq */
account_cfs_rq_runtime(cfs_rq, 0);
}
diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
index 3b725d39c06e..ef152d401fe2 100644
--- a/kernel/sched/isolation.c
+++ b/kernel/sched/isolation.c
@@ -123,8 +123,6 @@ int housekeeping_update(struct cpumask *isol_mask)
struct cpumask *trial, *old = NULL;
int err;
- lockdep_assert_cpus_held();
-
trial = kmalloc(cpumask_size(), GFP_KERNEL);
if (!trial)
return -ENOMEM;
@@ -136,7 +134,7 @@ int housekeeping_update(struct cpumask *isol_mask)
}
if (!housekeeping.flags)
- static_branch_enable_cpuslocked(&housekeeping_overridden);
+ static_branch_enable(&housekeeping_overridden);
if (housekeeping.flags & HK_FLAG_DOMAIN)
old = housekeeping_cpumask_dereference(HK_TYPE_DOMAIN);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index b82fb70a9d54..43bbf0693cca 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2748,6 +2748,17 @@ static inline const struct sched_class *next_active_class(const struct sched_cla
#define sched_class_above(_a, _b) ((_a) < (_b))
+static inline void rq_modified_begin(struct rq *rq, const struct sched_class *class)
+{
+ if (sched_class_above(rq->next_class, class))
+ rq->next_class = class;
+}
+
+static inline bool rq_modified_above(struct rq *rq, const struct sched_class *class)
+{
+ return sched_class_above(rq->next_class, class);
+}
+
static inline bool sched_stop_runnable(struct rq *rq)
{
return rq->stop && task_on_rq_queued(rq->stop);
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index a5c7d15fce72..9daf8c5d9687 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -256,8 +256,6 @@ EXPORT_SYMBOL(proc_dointvec_jiffies);
int proc_dointvec_userhz_jiffies(const struct ctl_table *table, int dir,
void *buffer, size_t *lenp, loff_t *ppos)
{
- if (SYSCTL_USER_TO_KERN(dir) && USER_HZ < HZ)
- return -EINVAL;
return proc_dointvec_conv(table, dir, buffer, lenp, ppos,
do_proc_int_conv_userhz_jiffies);
}
diff --git a/kernel/time/time.c b/kernel/time/time.c
index 0ba8e3c50d62..36fd2313ae7e 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -365,20 +365,16 @@ SYSCALL_DEFINE1(adjtimex_time32, struct old_timex32 __user *, utp)
}
#endif
+#if HZ > MSEC_PER_SEC || (MSEC_PER_SEC % HZ)
/**
* jiffies_to_msecs - Convert jiffies to milliseconds
* @j: jiffies value
*
- * Avoid unnecessary multiplications/divisions in the
- * two most common HZ cases.
- *
* Return: milliseconds value
*/
unsigned int jiffies_to_msecs(const unsigned long j)
{
-#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
- return (MSEC_PER_SEC / HZ) * j;
-#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC)
+#if HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC)
return (j + (HZ / MSEC_PER_SEC) - 1)/(HZ / MSEC_PER_SEC);
#else
# if BITS_PER_LONG == 32
@@ -390,7 +386,9 @@ unsigned int jiffies_to_msecs(const unsigned long j)
#endif
}
EXPORT_SYMBOL(jiffies_to_msecs);
+#endif
+#if (USEC_PER_SEC % HZ)
/**
* jiffies_to_usecs - Convert jiffies to microseconds
* @j: jiffies value
@@ -405,17 +403,14 @@ unsigned int jiffies_to_usecs(const unsigned long j)
*/
BUILD_BUG_ON(HZ > USEC_PER_SEC);
-#if !(USEC_PER_SEC % HZ)
- return (USEC_PER_SEC / HZ) * j;
-#else
-# if BITS_PER_LONG == 32
+#if BITS_PER_LONG == 32
return (HZ_TO_USEC_MUL32 * j) >> HZ_TO_USEC_SHR32;
-# else
+#else
return (j * HZ_TO_USEC_NUM) / HZ_TO_USEC_DEN;
-# endif
#endif
}
EXPORT_SYMBOL(jiffies_to_usecs);
+#endif
/**
* mktime64 - Converts date to seconds.
diff --git a/kernel/time/timer_migration.c b/kernel/time/timer_migration.c
index c1ed0d5e8de6..155eeaea4113 100644
--- a/kernel/time/timer_migration.c
+++ b/kernel/time/timer_migration.c
@@ -1559,8 +1559,6 @@ int tmigr_isolated_exclude_cpumask(struct cpumask *exclude_cpumask)
cpumask_var_t cpumask __free(free_cpumask_var) = CPUMASK_VAR_NULL;
int cpu;
- lockdep_assert_cpus_held();
-
if (!works)
return -ENOMEM;
if (!alloc_cpumask_var(&cpumask, GFP_KERNEL))
@@ -1570,6 +1568,7 @@ int tmigr_isolated_exclude_cpumask(struct cpumask *exclude_cpumask)
* First set previously isolated CPUs as available (unisolate).
* This cpumask contains only CPUs that switched to available now.
*/
+ guard(cpus_read_lock)();
cpumask_andnot(cpumask, cpu_online_mask, exclude_cpumask);
cpumask_andnot(cpumask, cpumask, tmigr_available_cpumask);
@@ -1626,7 +1625,6 @@ static int __init tmigr_init_isolation(void)
cpumask_andnot(cpumask, cpu_possible_mask, housekeeping_cpumask(HK_TYPE_DOMAIN));
/* Protect against RCU torture hotplug testing */
- guard(cpus_read_lock)();
return tmigr_isolated_exclude_cpumask(cpumask);
}
late_initcall(tmigr_init_isolation);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 9bc0dfd235af..0b040a417442 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -2454,8 +2454,10 @@ static void bpf_kprobe_multi_show_fdinfo(const struct bpf_link *link,
struct seq_file *seq)
{
struct bpf_kprobe_multi_link *kmulti_link;
+ bool has_cookies;
kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link);
+ has_cookies = !!kmulti_link->cookies;
seq_printf(seq,
"kprobe_cnt:\t%u\n"
@@ -2467,7 +2469,7 @@ static void bpf_kprobe_multi_show_fdinfo(const struct bpf_link *link,
for (int i = 0; i < kmulti_link->cnt; i++) {
seq_printf(seq,
"%llu\t %pS\n",
- kmulti_link->cookies[i],
+ has_cookies ? kmulti_link->cookies[i] : 0,
(void *)kmulti_link->addrs[i]);
}
}
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 827fb9a0bf0d..2f72af0357e5 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -8611,6 +8611,7 @@ ftrace_pid_follow_sched_process_fork(void *data,
struct trace_pid_list *pid_list;
struct trace_array *tr = data;
+ guard(preempt)();
pid_list = rcu_dereference_sched(tr->function_pids);
trace_filter_add_remove_task(pid_list, self, task);
@@ -8624,6 +8625,7 @@ ftrace_pid_follow_sched_process_exit(void *data, struct task_struct *task)
struct trace_pid_list *pid_list;
struct trace_array *tr = data;
+ guard(preempt)();
pid_list = rcu_dereference_sched(tr->function_pids);
trace_filter_add_remove_task(pid_list, NULL, task);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index f16f053ef77d..17d0ea0cc3e6 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -7310,6 +7310,27 @@ int ring_buffer_map(struct trace_buffer *buffer, int cpu,
return err;
}
+/*
+ * This is called when a VMA is duplicated (e.g., on fork()) to increment
+ * the user_mapped counter without remapping pages.
+ */
+void ring_buffer_map_dup(struct trace_buffer *buffer, int cpu)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+
+ if (WARN_ON(!cpumask_test_cpu(cpu, buffer->cpumask)))
+ return;
+
+ cpu_buffer = buffer->buffers[cpu];
+
+ guard(mutex)(&cpu_buffer->mapping_lock);
+
+ if (cpu_buffer->user_mapped)
+ __rb_inc_dec_mapped(cpu_buffer, true);
+ else
+ WARN(1, "Unexpected buffer stat, it should be mapped");
+}
+
int ring_buffer_unmap(struct trace_buffer *buffer, int cpu)
{
struct ring_buffer_per_cpu *cpu_buffer;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 23de3719f495..1e7c032a72d2 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -8213,6 +8213,18 @@ static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
static inline void put_snapshot_map(struct trace_array *tr) { }
#endif
+/*
+ * This is called when a VMA is duplicated (e.g., on fork()) to increment
+ * the user_mapped counter without remapping pages.
+ */
+static void tracing_buffers_mmap_open(struct vm_area_struct *vma)
+{
+ struct ftrace_buffer_info *info = vma->vm_file->private_data;
+ struct trace_iterator *iter = &info->iter;
+
+ ring_buffer_map_dup(iter->array_buffer->buffer, iter->cpu_file);
+}
+
static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
{
struct ftrace_buffer_info *info = vma->vm_file->private_data;
@@ -8232,6 +8244,7 @@ static int tracing_buffers_may_split(struct vm_area_struct *vma, unsigned long a
}
static const struct vm_operations_struct tracing_buffers_vmops = {
+ .open = tracing_buffers_mmap_open,
.close = tracing_buffers_mmap_close,
.may_split = tracing_buffers_may_split,
};
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 9928da636c9d..b7343fdfd7b0 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1039,6 +1039,7 @@ event_filter_pid_sched_process_exit(void *data, struct task_struct *task)
struct trace_pid_list *pid_list;
struct trace_array *tr = data;
+ guard(preempt)();
pid_list = rcu_dereference_raw(tr->filtered_pids);
trace_filter_add_remove_task(pid_list, NULL, task);
@@ -1054,6 +1055,7 @@ event_filter_pid_sched_process_fork(void *data,
struct trace_pid_list *pid_list;
struct trace_array *tr = data;
+ guard(preempt)();
pid_list = rcu_dereference_sched(tr->filtered_pids);
trace_filter_add_remove_task(pid_list, self, task);
@@ -4668,26 +4670,22 @@ static __init int event_trace_memsetup(void)
return 0;
}
-__init void
-early_enable_events(struct trace_array *tr, char *buf, bool disable_first)
+/*
+ * Helper function to enable or disable a comma-separated list of events
+ * from the bootup buffer.
+ */
+static __init void __early_set_events(struct trace_array *tr, char *buf, bool enable)
{
char *token;
- int ret;
-
- while (true) {
- token = strsep(&buf, ",");
-
- if (!token)
- break;
+ while ((token = strsep(&buf, ","))) {
if (*token) {
- /* Restarting syscalls requires that we stop them first */
- if (disable_first)
+ if (enable) {
+ if (ftrace_set_clr_event(tr, token, 1))
+ pr_warn("Failed to enable trace event: %s\n", token);
+ } else {
ftrace_set_clr_event(tr, token, 0);
-
- ret = ftrace_set_clr_event(tr, token, 1);
- if (ret)
- pr_warn("Failed to enable trace event: %s\n", token);
+ }
}
/* Put back the comma to allow this to be called again */
@@ -4696,6 +4694,32 @@ early_enable_events(struct trace_array *tr, char *buf, bool disable_first)
}
}
+/**
+ * early_enable_events - enable events from the bootup buffer
+ * @tr: The trace array to enable the events in
+ * @buf: The buffer containing the comma separated list of events
+ * @disable_first: If true, disable all events in @buf before enabling them
+ *
+ * This function enables events from the bootup buffer. If @disable_first
+ * is true, it will first disable all events in the buffer before enabling
+ * them.
+ *
+ * For syscall events, which rely on a global refcount to register the
+ * SYSCALL_WORK_SYSCALL_TRACEPOINT flag (especially for pid 1), we must
+ * ensure the refcount hits zero before re-enabling them. A simple
+ * "disable then enable" per-event is not enough if multiple syscalls are
+ * used, as the refcount will stay above zero. Thus, we need a two-phase
+ * approach: disable all, then enable all.
+ */
+__init void
+early_enable_events(struct trace_array *tr, char *buf, bool disable_first)
+{
+ if (disable_first)
+ __early_set_events(tr, buf, false);
+
+ __early_set_events(tr, buf, true);
+}
+
static __init int event_trace_enable(void)
{
struct trace_array *tr = top_trace_array();
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 3d8239fee004..0d2d3a2ea7dd 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -400,14 +400,19 @@ static void trace_graph_thresh_return(struct ftrace_graph_ret *trace,
struct fgraph_ops *gops,
struct ftrace_regs *fregs)
{
+ unsigned long *task_var = fgraph_get_task_var(gops);
struct fgraph_times *ftimes;
struct trace_array *tr;
+ unsigned int trace_ctx;
+ u64 calltime, rettime;
int size;
+ rettime = trace_clock_local();
+
ftrace_graph_addr_finish(gops, trace);
- if (trace_recursion_test(TRACE_GRAPH_NOTRACE_BIT)) {
- trace_recursion_clear(TRACE_GRAPH_NOTRACE_BIT);
+ if (*task_var & TRACE_GRAPH_NOTRACE) {
+ *task_var &= ~TRACE_GRAPH_NOTRACE;
return;
}
@@ -418,11 +423,13 @@ static void trace_graph_thresh_return(struct ftrace_graph_ret *trace,
tr = gops->private;
handle_nosleeptime(tr, trace, ftimes, size);
- if (tracing_thresh &&
- (trace_clock_local() - ftimes->calltime < tracing_thresh))
+ calltime = ftimes->calltime;
+
+ if (tracing_thresh && (rettime - calltime < tracing_thresh))
return;
- else
- trace_graph_return(trace, gops, fregs);
+
+ trace_ctx = tracing_gen_ctx();
+ __trace_graph_return(tr, trace, trace_ctx, calltime, rettime);
}
static struct fgraph_ops funcgraph_ops = {
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 318df4c75454..93f356d2b3d9 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -630,7 +630,7 @@ config DEBUG_FORCE_WEAK_PER_CPU
config WARN_CONTEXT_ANALYSIS
bool "Compiler context-analysis warnings"
- depends on CC_IS_CLANG && CLANG_VERSION >= 220000
+ depends on CC_IS_CLANG && CLANG_VERSION >= 220100
# Branch profiling re-defines "if", which messes with the compiler's
# ability to analyze __cond_acquires(..), resulting in false positives.
depends on !TRACE_BRANCH_PROFILING
@@ -641,7 +641,7 @@ config WARN_CONTEXT_ANALYSIS
and releasing user-definable "context locks".
Clang's name of the feature is "Thread Safety Analysis". Requires
- Clang 22 or later.
+ Clang 22.1.0 or later.
Produces warnings by default. Select CONFIG_WERROR if you wish to
turn these warnings into errors.
@@ -760,6 +760,7 @@ source "mm/Kconfig.debug"
config DEBUG_OBJECTS
bool "Debug object operations"
+ depends on PREEMPT_COUNT || !DEFERRED_STRUCT_PAGE_INIT
depends on DEBUG_KERNEL
help
If you say Y here, additional code will be inserted into the
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 89a1d6745dc2..12f50de85b62 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -398,9 +398,26 @@ static void fill_pool(void)
atomic_inc(&cpus_allocating);
while (pool_should_refill(&pool_global)) {
+ gfp_t gfp = __GFP_HIGH | __GFP_NOWARN;
HLIST_HEAD(head);
- if (!kmem_alloc_batch(&head, obj_cache, __GFP_HIGH | __GFP_NOWARN))
+ /*
+ * Allow reclaim only in preemptible context and during
+ * early boot. If not preemptible, the caller might hold
+ * locks causing a deadlock in the allocator.
+ *
+ * If the reclaim flag is not set during early boot then
+ * allocations, which happen before deferred page
+ * initialization has completed, will fail.
+ *
+ * In preemptible context the flag is harmless and not a
+ * performance issue as that's usually invoked from slow
+ * path initialization context.
+ */
+ if (preemptible() || system_state < SYSTEM_SCHEDULING)
+ gfp |= __GFP_KSWAPD_RECLAIM;
+
+ if (!kmem_alloc_batch(&head, obj_cache, gfp))
break;
guard(raw_spinlock_irqsave)(&pool_lock);
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 01eba1a547d4..adfc52fee9dc 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -1252,6 +1252,9 @@ int damon_commit_ctx(struct damon_ctx *dst, struct damon_ctx *src)
{
int err;
+ if (!is_power_of_2(src->min_region_sz))
+ return -EINVAL;
+
err = damon_commit_schemes(dst, src);
if (err)
return err;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index d4ca8cfd7f9d..8e2746ea74ad 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -94,6 +94,9 @@ static inline bool file_thp_enabled(struct vm_area_struct *vma)
inode = file_inode(vma->vm_file);
+ if (IS_ANON_FILE(inode))
+ return false;
+
return !inode_is_open_for_write(inode) && S_ISREG(inode->i_mode);
}
diff --git a/mm/kfence/core.c b/mm/kfence/core.c
index b4ea3262c925..7393957f9a20 100644
--- a/mm/kfence/core.c
+++ b/mm/kfence/core.c
@@ -13,6 +13,7 @@
#include <linux/hash.h>
#include <linux/irq_work.h>
#include <linux/jhash.h>
+#include <linux/kasan-enabled.h>
#include <linux/kcsan-checks.h>
#include <linux/kfence.h>
#include <linux/kmemleak.h>
@@ -917,6 +918,20 @@ void __init kfence_alloc_pool_and_metadata(void)
return;
/*
+ * If KASAN hardware tags are enabled, disable KFENCE, because it
+ * does not support MTE yet.
+ */
+ if (kasan_hw_tags_enabled()) {
+ pr_info("disabled as KASAN HW tags are enabled\n");
+ if (__kfence_pool) {
+ memblock_free(__kfence_pool, KFENCE_POOL_SIZE);
+ __kfence_pool = NULL;
+ }
+ kfence_sample_interval = 0;
+ return;
+ }
+
+ /*
* If the pool has already been initialized by arch, there is no need to
* re-allocate the memory pool.
*/
@@ -989,14 +1004,14 @@ static int kfence_init_late(void)
#ifdef CONFIG_CONTIG_ALLOC
struct page *pages;
- pages = alloc_contig_pages(nr_pages_pool, GFP_KERNEL, first_online_node,
- NULL);
+ pages = alloc_contig_pages(nr_pages_pool, GFP_KERNEL | __GFP_SKIP_KASAN,
+ first_online_node, NULL);
if (!pages)
return -ENOMEM;
__kfence_pool = page_to_virt(pages);
- pages = alloc_contig_pages(nr_pages_meta, GFP_KERNEL, first_online_node,
- NULL);
+ pages = alloc_contig_pages(nr_pages_meta, GFP_KERNEL | __GFP_SKIP_KASAN,
+ first_online_node, NULL);
if (pages)
kfence_metadata_init = page_to_virt(pages);
#else
@@ -1006,11 +1021,13 @@ static int kfence_init_late(void)
return -EINVAL;
}
- __kfence_pool = alloc_pages_exact(KFENCE_POOL_SIZE, GFP_KERNEL);
+ __kfence_pool = alloc_pages_exact(KFENCE_POOL_SIZE,
+ GFP_KERNEL | __GFP_SKIP_KASAN);
if (!__kfence_pool)
return -ENOMEM;
- kfence_metadata_init = alloc_pages_exact(KFENCE_METADATA_SIZE, GFP_KERNEL);
+ kfence_metadata_init = alloc_pages_exact(KFENCE_METADATA_SIZE,
+ GFP_KERNEL | __GFP_SKIP_KASAN);
#endif
if (!kfence_metadata_init)
diff --git a/mm/memfd_luo.c b/mm/memfd_luo.c
index 5c17da3880c5..e485b828d173 100644
--- a/mm/memfd_luo.c
+++ b/mm/memfd_luo.c
@@ -326,7 +326,12 @@ static void memfd_luo_finish(struct liveupdate_file_op_args *args)
struct memfd_luo_folio_ser *folios_ser;
struct memfd_luo_ser *ser;
- if (args->retrieved)
+ /*
+ * If retrieve was successful, nothing to do. If it failed, retrieve()
+ * already cleaned up everything it could. So nothing to do there
+ * either. Only need to clean up when retrieve was not called.
+ */
+ if (args->retrieve_status)
return;
ser = phys_to_virt(args->serialized_data);
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 61d983d23f55..df34797691bd 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -1896,7 +1896,11 @@ static void __init free_area_init(void)
for_each_node(nid) {
pg_data_t *pgdat;
- if (!node_online(nid))
+ /*
+ * If an architecture has not allocated node data for
+ * this node, presume the node is memoryless or offline.
+ */
+ if (!NODE_DATA(nid))
alloc_offline_node_data(nid);
pgdat = NODE_DATA(nid);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index fcc32737f451..2d4b6f1a554e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6928,7 +6928,8 @@ static int __alloc_contig_verify_gfp_mask(gfp_t gfp_mask, gfp_t *gfp_cc_mask)
{
const gfp_t reclaim_mask = __GFP_IO | __GFP_FS | __GFP_RECLAIM;
const gfp_t action_mask = __GFP_COMP | __GFP_RETRY_MAYFAIL | __GFP_NOWARN |
- __GFP_ZERO | __GFP_ZEROTAGS | __GFP_SKIP_ZERO;
+ __GFP_ZERO | __GFP_ZEROTAGS | __GFP_SKIP_ZERO |
+ __GFP_SKIP_KASAN;
const gfp_t cc_action_mask = __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
/*
diff --git a/mm/slab.h b/mm/slab.h
index 71c7261bf822..f6ef862b60ef 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -290,14 +290,14 @@ static inline void *nearest_obj(struct kmem_cache *cache,
/* Determine object index from a given position */
static inline unsigned int __obj_to_index(const struct kmem_cache *cache,
- void *addr, void *obj)
+ void *addr, const void *obj)
{
return reciprocal_divide(kasan_reset_tag(obj) - addr,
cache->reciprocal_size);
}
static inline unsigned int obj_to_index(const struct kmem_cache *cache,
- const struct slab *slab, void *obj)
+ const struct slab *slab, const void *obj)
{
if (is_kfence_address(obj))
return 0;
diff --git a/mm/slub.c b/mm/slub.c
index 862642c165ed..0c906fefc31b 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2041,18 +2041,18 @@ static inline void dec_slabs_node(struct kmem_cache *s, int node,
#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
-static inline void mark_objexts_empty(struct slabobj_ext *obj_exts)
+static inline void mark_obj_codetag_empty(const void *obj)
{
- struct slab *obj_exts_slab;
+ struct slab *obj_slab;
unsigned long slab_exts;
- obj_exts_slab = virt_to_slab(obj_exts);
- slab_exts = slab_obj_exts(obj_exts_slab);
+ obj_slab = virt_to_slab(obj);
+ slab_exts = slab_obj_exts(obj_slab);
if (slab_exts) {
get_slab_obj_exts(slab_exts);
- unsigned int offs = obj_to_index(obj_exts_slab->slab_cache,
- obj_exts_slab, obj_exts);
- struct slabobj_ext *ext = slab_obj_ext(obj_exts_slab,
+ unsigned int offs = obj_to_index(obj_slab->slab_cache,
+ obj_slab, obj);
+ struct slabobj_ext *ext = slab_obj_ext(obj_slab,
slab_exts, offs);
if (unlikely(is_codetag_empty(&ext->ref))) {
@@ -2090,7 +2090,7 @@ static inline void handle_failed_objexts_alloc(unsigned long obj_exts,
#else /* CONFIG_MEM_ALLOC_PROFILING_DEBUG */
-static inline void mark_objexts_empty(struct slabobj_ext *obj_exts) {}
+static inline void mark_obj_codetag_empty(const void *obj) {}
static inline bool mark_failed_objexts_alloc(struct slab *slab) { return false; }
static inline void handle_failed_objexts_alloc(unsigned long obj_exts,
struct slabobj_ext *vec, unsigned int objects) {}
@@ -2196,7 +2196,6 @@ int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s,
retry:
old_exts = READ_ONCE(slab->obj_exts);
handle_failed_objexts_alloc(old_exts, vec, objects);
- slab_set_stride(slab, sizeof(struct slabobj_ext));
if (new_slab) {
/*
@@ -2211,7 +2210,7 @@ retry:
* assign slabobj_exts in parallel. In this case the existing
* objcg vector should be reused.
*/
- mark_objexts_empty(vec);
+ mark_obj_codetag_empty(vec);
if (unlikely(!allow_spin))
kfree_nolock(vec);
else
@@ -2254,7 +2253,7 @@ static inline void free_slab_obj_exts(struct slab *slab, bool allow_spin)
* NULL, therefore replace NULL with CODETAG_EMPTY to indicate that
* the extension for obj_exts is expected to be NULL.
*/
- mark_objexts_empty(obj_exts);
+ mark_obj_codetag_empty(obj_exts);
if (allow_spin)
kfree(obj_exts);
else
@@ -2272,6 +2271,9 @@ static void alloc_slab_obj_exts_early(struct kmem_cache *s, struct slab *slab)
void *addr;
unsigned long obj_exts;
+ /* Initialize stride early to avoid memory ordering issues */
+ slab_set_stride(slab, sizeof(struct slabobj_ext));
+
if (!need_slab_obj_exts(s))
return;
@@ -2288,7 +2290,6 @@ static void alloc_slab_obj_exts_early(struct kmem_cache *s, struct slab *slab)
obj_exts |= MEMCG_DATA_OBJEXTS;
#endif
slab->obj_exts = obj_exts;
- slab_set_stride(slab, sizeof(struct slabobj_ext));
} else if (s->flags & SLAB_OBJ_EXT_IN_OBJ) {
unsigned int offset = obj_exts_offset_in_object(s);
@@ -2312,6 +2313,10 @@ static void alloc_slab_obj_exts_early(struct kmem_cache *s, struct slab *slab)
#else /* CONFIG_SLAB_OBJ_EXT */
+static inline void mark_obj_codetag_empty(const void *obj)
+{
+}
+
static inline void init_slab_obj_exts(struct slab *slab)
{
}
@@ -2783,6 +2788,15 @@ static inline struct slab_sheaf *alloc_empty_sheaf(struct kmem_cache *s,
static void free_empty_sheaf(struct kmem_cache *s, struct slab_sheaf *sheaf)
{
+ /*
+ * If the sheaf was created with __GFP_NO_OBJ_EXT flag then its
+ * corresponding extension is NULL and alloc_tag_sub() will throw a
+ * warning, therefore replace NULL with CODETAG_EMPTY to indicate
+ * that the extension for this sheaf is expected to be NULL.
+ */
+ if (s->flags & SLAB_KMALLOC)
+ mark_obj_codetag_empty(sheaf);
+
kfree(sheaf);
stat(s, SHEAF_FREE);
@@ -2822,7 +2836,7 @@ static struct slab_sheaf *alloc_full_sheaf(struct kmem_cache *s, gfp_t gfp)
if (!sheaf)
return NULL;
- if (refill_sheaf(s, sheaf, gfp | __GFP_NOMEMALLOC)) {
+ if (refill_sheaf(s, sheaf, gfp | __GFP_NOMEMALLOC | __GFP_NOWARN)) {
free_empty_sheaf(s, sheaf);
return NULL;
}
@@ -4575,7 +4589,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs,
return NULL;
if (empty) {
- if (!refill_sheaf(s, empty, gfp | __GFP_NOMEMALLOC)) {
+ if (!refill_sheaf(s, empty, gfp | __GFP_NOMEMALLOC | __GFP_NOWARN)) {
full = empty;
} else {
/*
@@ -4890,9 +4904,14 @@ EXPORT_SYMBOL(kmem_cache_alloc_node_noprof);
static int __prefill_sheaf_pfmemalloc(struct kmem_cache *s,
struct slab_sheaf *sheaf, gfp_t gfp)
{
- int ret = 0;
+ gfp_t gfp_nomemalloc;
+ int ret;
+
+ gfp_nomemalloc = gfp | __GFP_NOMEMALLOC;
+ if (gfp_pfmemalloc_allowed(gfp))
+ gfp_nomemalloc |= __GFP_NOWARN;
- ret = refill_sheaf(s, sheaf, gfp | __GFP_NOMEMALLOC);
+ ret = refill_sheaf(s, sheaf, gfp_nomemalloc);
if (likely(!ret || !gfp_pfmemalloc_allowed(gfp)))
return ret;
diff --git a/net/atm/lec.c b/net/atm/lec.c
index a107375c0629..fb93c6e1c329 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -1260,24 +1260,28 @@ static void lec_arp_clear_vccs(struct lec_arp_table *entry)
struct lec_vcc_priv *vpriv = LEC_VCC_PRIV(vcc);
struct net_device *dev = (struct net_device *)vcc->proto_data;
- vcc->pop = vpriv->old_pop;
- if (vpriv->xoff)
- netif_wake_queue(dev);
- kfree(vpriv);
- vcc->user_back = NULL;
- vcc->push = entry->old_push;
- vcc_release_async(vcc, -EPIPE);
+ if (vpriv) {
+ vcc->pop = vpriv->old_pop;
+ if (vpriv->xoff)
+ netif_wake_queue(dev);
+ kfree(vpriv);
+ vcc->user_back = NULL;
+ vcc->push = entry->old_push;
+ vcc_release_async(vcc, -EPIPE);
+ }
entry->vcc = NULL;
}
if (entry->recv_vcc) {
struct atm_vcc *vcc = entry->recv_vcc;
struct lec_vcc_priv *vpriv = LEC_VCC_PRIV(vcc);
- kfree(vpriv);
- vcc->user_back = NULL;
+ if (vpriv) {
+ kfree(vpriv);
+ vcc->user_back = NULL;
- entry->recv_vcc->push = entry->old_recv_push;
- vcc_release_async(entry->recv_vcc, -EPIPE);
+ entry->recv_vcc->push = entry->old_recv_push;
+ vcc_release_async(entry->recv_vcc, -EPIPE);
+ }
entry->recv_vcc = NULL;
}
}
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index 2ce4e5bf9292..fdc2abe96d77 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -111,7 +111,15 @@ static bool batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh,
/* unsupported WiFi driver version */
goto default_throughput;
- real_netdev = batadv_get_real_netdev(hard_iface->net_dev);
+ /* only use rtnl_trylock because the elp worker will be cancelled while
+ * the rntl_lock is held. the cancel_delayed_work_sync() would otherwise
+ * wait forever when the elp work_item was started and it is then also
+ * trying to rtnl_lock
+ */
+ if (!rtnl_trylock())
+ return false;
+ real_netdev = __batadv_get_real_netdev(hard_iface->net_dev);
+ rtnl_unlock();
if (!real_netdev)
goto default_throughput;
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 7b7640f3ffe2..d6732c34aeaf 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -204,7 +204,7 @@ static bool batadv_is_valid_iface(const struct net_device *net_dev)
}
/**
- * batadv_get_real_netdevice() - check if the given netdev struct is a virtual
+ * __batadv_get_real_netdev() - check if the given netdev struct is a virtual
* interface on top of another 'real' interface
* @netdev: the device to check
*
@@ -214,7 +214,7 @@ static bool batadv_is_valid_iface(const struct net_device *net_dev)
* Return: the 'real' net device or the original net device and NULL in case
* of an error.
*/
-static struct net_device *batadv_get_real_netdevice(struct net_device *netdev)
+struct net_device *__batadv_get_real_netdev(struct net_device *netdev)
{
struct batadv_hard_iface *hard_iface = NULL;
struct net_device *real_netdev = NULL;
@@ -267,7 +267,7 @@ struct net_device *batadv_get_real_netdev(struct net_device *net_device)
struct net_device *real_netdev;
rtnl_lock();
- real_netdev = batadv_get_real_netdevice(net_device);
+ real_netdev = __batadv_get_real_netdev(net_device);
rtnl_unlock();
return real_netdev;
@@ -336,7 +336,7 @@ static u32 batadv_wifi_flags_evaluate(struct net_device *net_device)
if (batadv_is_cfg80211_netdev(net_device))
wifi_flags |= BATADV_HARDIF_WIFI_CFG80211_DIRECT;
- real_netdev = batadv_get_real_netdevice(net_device);
+ real_netdev = __batadv_get_real_netdev(net_device);
if (!real_netdev)
return wifi_flags;
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index 9db8a310961e..9ba8fb2bdceb 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -67,6 +67,7 @@ enum batadv_hard_if_bcast {
extern struct notifier_block batadv_hard_if_notifier;
+struct net_device *__batadv_get_real_netdev(struct net_device *net_device);
struct net_device *batadv_get_real_netdev(struct net_device *net_device);
bool batadv_is_cfg80211_hardif(struct batadv_hard_iface *hard_iface);
bool batadv_is_wifi_hardif(struct batadv_hard_iface *hard_iface);
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index ee01122f466f..f7502e62dd35 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -74,7 +74,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
eth_hdr(skb)->h_proto == htons(ETH_P_RARP)) &&
br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED)) {
br_do_proxy_suppress_arp(skb, br, vid, NULL);
- } else if (IS_ENABLED(CONFIG_IPV6) &&
+ } else if (ipv6_mod_enabled() &&
skb->protocol == htons(ETH_P_IPV6) &&
br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED) &&
pskb_may_pull(skb, sizeof(struct ipv6hdr) +
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 1405f1061a54..2cbae0f9ae1f 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -170,7 +170,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
(skb->protocol == htons(ETH_P_ARP) ||
skb->protocol == htons(ETH_P_RARP))) {
br_do_proxy_suppress_arp(skb, br, vid, p);
- } else if (IS_ENABLED(CONFIG_IPV6) &&
+ } else if (ipv6_mod_enabled() &&
skb->protocol == htons(ETH_P_IPV6) &&
br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED) &&
pskb_may_pull(skb, sizeof(struct ipv6hdr) +
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index b9b2981c4841..9b55d38ea9ed 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -1345,6 +1345,16 @@ br_multicast_ctx_options_equal(const struct net_bridge_mcast *brmctx1,
}
static inline bool
+br_multicast_port_ctx_options_equal(const struct net_bridge_mcast_port *pmctx1,
+ const struct net_bridge_mcast_port *pmctx2)
+{
+ return br_multicast_ngroups_get(pmctx1) ==
+ br_multicast_ngroups_get(pmctx2) &&
+ br_multicast_ngroups_get_max(pmctx1) ==
+ br_multicast_ngroups_get_max(pmctx2);
+}
+
+static inline bool
br_multicast_ctx_matches_vlan_snooping(const struct net_bridge_mcast *brmctx)
{
bool vlan_snooping_enabled;
diff --git a/net/bridge/br_vlan_options.c b/net/bridge/br_vlan_options.c
index 8fa89b04ee94..5514e1fc8d1f 100644
--- a/net/bridge/br_vlan_options.c
+++ b/net/bridge/br_vlan_options.c
@@ -43,9 +43,29 @@ bool br_vlan_opts_eq_range(const struct net_bridge_vlan *v_curr,
u8 range_mc_rtr = br_vlan_multicast_router(range_end);
u8 curr_mc_rtr = br_vlan_multicast_router(v_curr);
- return v_curr->state == range_end->state &&
- __vlan_tun_can_enter_range(v_curr, range_end) &&
- curr_mc_rtr == range_mc_rtr;
+ if (v_curr->state != range_end->state)
+ return false;
+
+ if (!__vlan_tun_can_enter_range(v_curr, range_end))
+ return false;
+
+ if (curr_mc_rtr != range_mc_rtr)
+ return false;
+
+ /* Check user-visible priv_flags that affect output */
+ if ((v_curr->priv_flags ^ range_end->priv_flags) &
+ (BR_VLFLAG_NEIGH_SUPPRESS_ENABLED | BR_VLFLAG_MCAST_ENABLED))
+ return false;
+
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ if (!br_vlan_is_master(v_curr) &&
+ !br_multicast_port_ctx_vlan_disabled(&v_curr->port_mcast_ctx) &&
+ !br_multicast_port_ctx_options_equal(&v_curr->port_mcast_ctx,
+ &range_end->port_mcast_ctx))
+ return false;
+#endif
+
+ return true;
}
bool br_vlan_opts_fill(struct sk_buff *skb, const struct net_bridge_vlan *v,
diff --git a/net/can/bcm.c b/net/can/bcm.c
index b7324e9c955b..fd9fa072881e 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1176,6 +1176,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
if (!op)
return -ENOMEM;
+ spin_lock_init(&op->bcm_tx_lock);
op->can_id = msg_head->can_id;
op->nframes = msg_head->nframes;
op->cfsiz = CFSIZ(msg_head->flags);
diff --git a/net/core/dev.c b/net/core/dev.c
index b470487788a2..203dc36aaed5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3987,7 +3987,7 @@ static struct sk_buff *validate_xmit_unreadable_skb(struct sk_buff *skb,
if (shinfo->nr_frags > 0) {
niov = netmem_to_net_iov(skb_frag_netmem(&shinfo->frags[0]));
if (net_is_devmem_iov(niov) &&
- net_devmem_iov_binding(niov)->dev != dev)
+ READ_ONCE(net_devmem_iov_binding(niov)->dev) != dev)
goto out_free;
}
@@ -4818,10 +4818,7 @@ int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
if (dev->flags & IFF_UP) {
int cpu = smp_processor_id(); /* ok because BHs are off */
- /* Other cpus might concurrently change txq->xmit_lock_owner
- * to -1 or to their cpu id, but not to our id.
- */
- if (READ_ONCE(txq->xmit_lock_owner) != cpu) {
+ if (!netif_tx_owned(txq, cpu)) {
bool is_list = false;
if (dev_xmit_recursion())
@@ -7807,11 +7804,12 @@ static int napi_thread_wait(struct napi_struct *napi)
return -1;
}
-static void napi_threaded_poll_loop(struct napi_struct *napi, bool busy_poll)
+static void napi_threaded_poll_loop(struct napi_struct *napi,
+ unsigned long *busy_poll_last_qs)
{
+ unsigned long last_qs = busy_poll_last_qs ? *busy_poll_last_qs : jiffies;
struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
struct softnet_data *sd;
- unsigned long last_qs = jiffies;
for (;;) {
bool repoll = false;
@@ -7840,12 +7838,12 @@ static void napi_threaded_poll_loop(struct napi_struct *napi, bool busy_poll)
/* When busy poll is enabled, the old packets are not flushed in
* napi_complete_done. So flush them here.
*/
- if (busy_poll)
+ if (busy_poll_last_qs)
gro_flush_normal(&napi->gro, HZ >= 1000);
local_bh_enable();
/* Call cond_resched here to avoid watchdog warnings. */
- if (repoll || busy_poll) {
+ if (repoll || busy_poll_last_qs) {
rcu_softirq_qs_periodic(last_qs);
cond_resched();
}
@@ -7853,11 +7851,15 @@ static void napi_threaded_poll_loop(struct napi_struct *napi, bool busy_poll)
if (!repoll)
break;
}
+
+ if (busy_poll_last_qs)
+ *busy_poll_last_qs = last_qs;
}
static int napi_threaded_poll(void *data)
{
struct napi_struct *napi = data;
+ unsigned long last_qs = jiffies;
bool want_busy_poll;
bool in_busy_poll;
unsigned long val;
@@ -7875,7 +7877,7 @@ static int napi_threaded_poll(void *data)
assign_bit(NAPI_STATE_IN_BUSY_POLL, &napi->state,
want_busy_poll);
- napi_threaded_poll_loop(napi, want_busy_poll);
+ napi_threaded_poll_loop(napi, want_busy_poll ? &last_qs : NULL);
}
return 0;
@@ -13189,7 +13191,7 @@ static void run_backlog_napi(unsigned int cpu)
{
struct softnet_data *sd = per_cpu_ptr(&softnet_data, cpu);
- napi_threaded_poll_loop(&sd->backlog, false);
+ napi_threaded_poll_loop(&sd->backlog, NULL);
}
static void backlog_napi_setup(unsigned int cpu)
diff --git a/net/core/devmem.c b/net/core/devmem.c
index 8c9aad776bf4..69d79aee07ef 100644
--- a/net/core/devmem.c
+++ b/net/core/devmem.c
@@ -396,7 +396,8 @@ struct net_devmem_dmabuf_binding *net_devmem_get_binding(struct sock *sk,
* net_device.
*/
dst_dev = dst_dev_rcu(dst);
- if (unlikely(!dst_dev) || unlikely(dst_dev != binding->dev)) {
+ if (unlikely(!dst_dev) ||
+ unlikely(dst_dev != READ_ONCE(binding->dev))) {
err = -ENODEV;
goto out_unlock;
}
@@ -513,7 +514,8 @@ static void mp_dmabuf_devmem_uninstall(void *mp_priv,
xa_erase(&binding->bound_rxqs, xa_idx);
if (xa_empty(&binding->bound_rxqs)) {
mutex_lock(&binding->lock);
- binding->dev = NULL;
+ ASSERT_EXCLUSIVE_WRITER(binding->dev);
+ WRITE_ONCE(binding->dev, NULL);
mutex_unlock(&binding->lock);
}
break;
diff --git a/net/core/filter.c b/net/core/filter.c
index 0d5d5a17acb2..a77d23fe2359 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4150,12 +4150,14 @@ static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset)
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags - 1];
struct xdp_rxq_info *rxq = xdp->rxq;
- unsigned int tailroom;
+ int tailroom;
if (!rxq->frag_size || rxq->frag_size > xdp->frame_sz)
return -EOPNOTSUPP;
- tailroom = rxq->frag_size - skb_frag_size(frag) - skb_frag_off(frag);
+ tailroom = rxq->frag_size - skb_frag_size(frag) -
+ skb_frag_off(frag) % rxq->frag_size;
+ WARN_ON_ONCE(tailroom < 0);
if (unlikely(offset > tailroom))
return -EINVAL;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index a8558a52884f..cd74beffd209 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -132,7 +132,7 @@ static int netif_local_xmit_active(struct net_device *dev)
for (i = 0; i < dev->num_tx_queues; i++) {
struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
- if (READ_ONCE(txq->xmit_lock_owner) == smp_processor_id())
+ if (netif_tx_owned(txq, smp_processor_id()))
return 1;
}
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 9a3965680451..6a6f2cda5aae 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -20,7 +20,6 @@
#include <net/tcp.h>
static siphash_aligned_key_t net_secret;
-static siphash_aligned_key_t ts_secret;
#define EPHEMERAL_PORT_SHUFFLE_PERIOD (10 * HZ)
@@ -28,11 +27,6 @@ static __always_inline void net_secret_init(void)
{
net_get_random_once(&net_secret, sizeof(net_secret));
}
-
-static __always_inline void ts_secret_init(void)
-{
- net_get_random_once(&ts_secret, sizeof(ts_secret));
-}
#endif
#ifdef CONFIG_INET
@@ -53,28 +47,9 @@ static u32 seq_scale(u32 seq)
#endif
#if IS_ENABLED(CONFIG_IPV6)
-u32 secure_tcpv6_ts_off(const struct net *net,
- const __be32 *saddr, const __be32 *daddr)
-{
- const struct {
- struct in6_addr saddr;
- struct in6_addr daddr;
- } __aligned(SIPHASH_ALIGNMENT) combined = {
- .saddr = *(struct in6_addr *)saddr,
- .daddr = *(struct in6_addr *)daddr,
- };
-
- if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1)
- return 0;
-
- ts_secret_init();
- return siphash(&combined, offsetofend(typeof(combined), daddr),
- &ts_secret);
-}
-EXPORT_IPV6_MOD(secure_tcpv6_ts_off);
-
-u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr,
- __be16 sport, __be16 dport)
+union tcp_seq_and_ts_off
+secure_tcpv6_seq_and_ts_off(const struct net *net, const __be32 *saddr,
+ const __be32 *daddr, __be16 sport, __be16 dport)
{
const struct {
struct in6_addr saddr;
@@ -87,14 +62,20 @@ u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr,
.sport = sport,
.dport = dport
};
- u32 hash;
+ union tcp_seq_and_ts_off st;
net_secret_init();
- hash = siphash(&combined, offsetofend(typeof(combined), dport),
- &net_secret);
- return seq_scale(hash);
+
+ st.hash64 = siphash(&combined, offsetofend(typeof(combined), dport),
+ &net_secret);
+
+ if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1)
+ st.ts_off = 0;
+
+ st.seq = seq_scale(st.seq);
+ return st;
}
-EXPORT_SYMBOL(secure_tcpv6_seq);
+EXPORT_SYMBOL(secure_tcpv6_seq_and_ts_off);
u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
__be16 dport)
@@ -118,33 +99,30 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
#endif
#ifdef CONFIG_INET
-u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr)
-{
- if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1)
- return 0;
-
- ts_secret_init();
- return siphash_2u32((__force u32)saddr, (__force u32)daddr,
- &ts_secret);
-}
-
/* secure_tcp_seq_and_tsoff(a, b, 0, d) == secure_ipv4_port_ephemeral(a, b, d),
* but fortunately, `sport' cannot be 0 in any circumstances. If this changes,
* it would be easy enough to have the former function use siphash_4u32, passing
* the arguments as separate u32.
*/
-u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
- __be16 sport, __be16 dport)
+union tcp_seq_and_ts_off
+secure_tcp_seq_and_ts_off(const struct net *net, __be32 saddr, __be32 daddr,
+ __be16 sport, __be16 dport)
{
- u32 hash;
+ u32 ports = (__force u32)sport << 16 | (__force u32)dport;
+ union tcp_seq_and_ts_off st;
net_secret_init();
- hash = siphash_3u32((__force u32)saddr, (__force u32)daddr,
- (__force u32)sport << 16 | (__force u32)dport,
- &net_secret);
- return seq_scale(hash);
+
+ st.hash64 = siphash_3u32((__force u32)saddr, (__force u32)daddr,
+ ports, &net_secret);
+
+ if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1)
+ st.ts_off = 0;
+
+ st.seq = seq_scale(st.seq);
+ return st;
}
-EXPORT_SYMBOL_GPL(secure_tcp_seq);
+EXPORT_SYMBOL_GPL(secure_tcp_seq_and_ts_off);
u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
{
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 2e26174c9919..3261793abe83 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -1205,8 +1205,8 @@ void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
return;
psock->saved_data_ready = sk->sk_data_ready;
- sk->sk_data_ready = sk_psock_strp_data_ready;
- sk->sk_write_space = sk_psock_write_space;
+ WRITE_ONCE(sk->sk_data_ready, sk_psock_strp_data_ready);
+ WRITE_ONCE(sk->sk_write_space, sk_psock_write_space);
}
void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
@@ -1216,8 +1216,8 @@ void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
if (!psock->saved_data_ready)
return;
- sk->sk_data_ready = psock->saved_data_ready;
- psock->saved_data_ready = NULL;
+ WRITE_ONCE(sk->sk_data_ready, psock->saved_data_ready);
+ WRITE_ONCE(psock->saved_data_ready, NULL);
strp_stop(&psock->strp);
}
@@ -1296,8 +1296,8 @@ void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock)
return;
psock->saved_data_ready = sk->sk_data_ready;
- sk->sk_data_ready = sk_psock_verdict_data_ready;
- sk->sk_write_space = sk_psock_write_space;
+ WRITE_ONCE(sk->sk_data_ready, sk_psock_verdict_data_ready);
+ WRITE_ONCE(sk->sk_write_space, sk_psock_write_space);
}
void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock)
@@ -1308,6 +1308,6 @@ void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock)
if (!psock->saved_data_ready)
return;
- sk->sk_data_ready = psock->saved_data_ready;
+ WRITE_ONCE(sk->sk_data_ready, psock->saved_data_ready);
psock->saved_data_ready = NULL;
}
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index b71c22475c51..df922f9f5289 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -748,6 +748,7 @@ config TCP_SIGPOOL
config TCP_AO
bool "TCP: Authentication Option (RFC5925)"
select CRYPTO
+ select CRYPTO_LIB_UTILS
select TCP_SIGPOOL
depends on 64BIT && IPV6 != m # seq-number extension needs WRITE_ONCE(u64)
help
@@ -761,6 +762,7 @@ config TCP_AO
config TCP_MD5SIG
bool "TCP: MD5 Signature Option support (RFC2385)"
select CRYPTO_LIB_MD5
+ select CRYPTO_LIB_UTILS
help
RFC2385 specifies a method of giving MD5 protection to TCP sessions.
Its main (only?) use is to protect BGP sessions between core routers
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 52847950b28a..61e654b395be 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -200,7 +200,7 @@ static bool inet_bind2_bucket_addr_match(const struct inet_bind2_bucket *tb2,
void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
struct inet_bind2_bucket *tb2, unsigned short port)
{
- inet_sk(sk)->inet_num = port;
+ WRITE_ONCE(inet_sk(sk)->inet_num, port);
inet_csk(sk)->icsk_bind_hash = tb;
inet_csk(sk)->icsk_bind2_hash = tb2;
sk_add_bind_node(sk, &tb2->owners);
@@ -224,7 +224,7 @@ static void __inet_put_port(struct sock *sk)
spin_lock(&head->lock);
tb = inet_csk(sk)->icsk_bind_hash;
inet_csk(sk)->icsk_bind_hash = NULL;
- inet_sk(sk)->inet_num = 0;
+ WRITE_ONCE(inet_sk(sk)->inet_num, 0);
sk->sk_userlocks &= ~SOCK_CONNECT_BIND;
spin_lock(&head2->lock);
@@ -352,7 +352,7 @@ static inline int compute_score(struct sock *sk, const struct net *net,
{
int score = -1;
- if (net_eq(sock_net(sk), net) && sk->sk_num == hnum &&
+ if (net_eq(sock_net(sk), net) && READ_ONCE(sk->sk_num) == hnum &&
!ipv6_only_sock(sk)) {
if (sk->sk_rcv_saddr != daddr)
return -1;
@@ -1206,7 +1206,7 @@ error:
sk->sk_hash = 0;
inet_sk(sk)->inet_sport = 0;
- inet_sk(sk)->inet_num = 0;
+ WRITE_ONCE(inet_sk(sk)->inet_num, 0);
if (tw)
inet_twsk_bind_unhash(tw, hinfo);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 061751aabc8e..fc3affd9c801 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -378,9 +378,14 @@ static struct request_sock *cookie_tcp_check(struct net *net, struct sock *sk,
tcp_parse_options(net, skb, &tcp_opt, 0, NULL);
if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
- tsoff = secure_tcp_ts_off(net,
- ip_hdr(skb)->daddr,
- ip_hdr(skb)->saddr);
+ union tcp_seq_and_ts_off st;
+
+ st = secure_tcp_seq_and_ts_off(net,
+ ip_hdr(skb)->daddr,
+ ip_hdr(skb)->saddr,
+ tcp_hdr(skb)->dest,
+ tcp_hdr(skb)->source);
+ tsoff = st.ts_off;
tcp_opt.rcv_tsecr -= tsoff;
}
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 643763bc2142..5654cc9c8a0b 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -486,7 +486,8 @@ static void proc_fib_multipath_hash_set_seed(struct net *net, u32 user_seed)
proc_fib_multipath_hash_rand_seed),
};
- WRITE_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed, new);
+ WRITE_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed.user_seed, new.user_seed);
+ WRITE_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed.mp_seed, new.mp_seed);
}
static int proc_fib_multipath_hash_seed(const struct ctl_table *table, int write,
@@ -500,7 +501,7 @@ static int proc_fib_multipath_hash_seed(const struct ctl_table *table, int write
int ret;
mphs = &net->ipv4.sysctl_fib_multipath_hash_seed;
- user_seed = mphs->user_seed;
+ user_seed = READ_ONCE(mphs->user_seed);
tmp = *table;
tmp.data = &user_seed;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1c8be22a361e..ed6f6712f060 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -244,6 +244,7 @@
#define pr_fmt(fmt) "TCP: " fmt
#include <crypto/md5.h>
+#include <crypto/utils.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/types.h>
@@ -1446,7 +1447,7 @@ out_err:
err = sk_stream_error(sk, flags, err);
/* make sure we wake any epoll edge trigger waiter */
if (unlikely(tcp_rtx_and_write_queues_empty(sk) && err == -EAGAIN)) {
- sk->sk_write_space(sk);
+ READ_ONCE(sk->sk_write_space)(sk);
tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
}
if (binding)
@@ -4163,7 +4164,7 @@ ao_parse:
break;
case TCP_NOTSENT_LOWAT:
WRITE_ONCE(tp->notsent_lowat, val);
- sk->sk_write_space(sk);
+ READ_ONCE(sk->sk_write_space)(sk);
break;
case TCP_INQ:
if (val > 1 || val < 0)
@@ -4952,7 +4953,7 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb,
tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
else
tp->af_specific->calc_md5_hash(newhash, key, NULL, skb);
- if (memcmp(hash_location, newhash, 16) != 0) {
+ if (crypto_memneq(hash_location, newhash, 16)) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
trace_tcp_hash_md5_mismatch(sk, skb);
return SKB_DROP_REASON_TCP_MD5FAILURE;
diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c
index 4980caddb0fc..a97cdf3e6af4 100644
--- a/net/ipv4/tcp_ao.c
+++ b/net/ipv4/tcp_ao.c
@@ -10,6 +10,7 @@
#define pr_fmt(fmt) "TCP: " fmt
#include <crypto/hash.h>
+#include <crypto/utils.h>
#include <linux/inetdevice.h>
#include <linux/tcp.h>
@@ -922,7 +923,7 @@ tcp_ao_verify_hash(const struct sock *sk, const struct sk_buff *skb,
/* XXX: make it per-AF callback? */
tcp_ao_hash_skb(family, hash_buf, key, sk, skb, traffic_key,
(phash - (u8 *)th), sne);
- if (memcmp(phash, hash_buf, maclen)) {
+ if (crypto_memneq(phash, hash_buf, maclen)) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD);
atomic64_inc(&info->counters.pkt_bad);
atomic64_inc(&key->pkt_bad);
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index a0215a2da324..cc0bd73f36b6 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -724,7 +724,7 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
WRITE_ONCE(sk->sk_prot->unhash, psock->saved_unhash);
tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space);
} else {
- sk->sk_write_space = psock->saved_write_space;
+ WRITE_ONCE(sk->sk_write_space, psock->saved_write_space);
/* Pairs with lockless read in sk_clone_lock() */
sock_replace_proto(sk, psock->sk_proto);
}
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index d83efd91f461..7935702e394b 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -509,7 +509,7 @@ next_chunk:
if (r->sdiag_family != AF_UNSPEC &&
sk->sk_family != r->sdiag_family)
goto next_normal;
- if (r->id.idiag_sport != htons(sk->sk_num) &&
+ if (r->id.idiag_sport != htons(READ_ONCE(sk->sk_num)) &&
r->id.idiag_sport)
goto next_normal;
if (r->id.idiag_dport != sk->sk_dport &&
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c27d11c3470b..71ac69b7b75e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5320,25 +5320,11 @@ static void tcp_ofo_queue(struct sock *sk)
static bool tcp_prune_ofo_queue(struct sock *sk, const struct sk_buff *in_skb);
static int tcp_prune_queue(struct sock *sk, const struct sk_buff *in_skb);
-/* Check if this incoming skb can be added to socket receive queues
- * while satisfying sk->sk_rcvbuf limit.
- *
- * In theory we should use skb->truesize, but this can cause problems
- * when applications use too small SO_RCVBUF values.
- * When LRO / hw gro is used, the socket might have a high tp->scaling_ratio,
- * allowing RWIN to be close to available space.
- * Whenever the receive queue gets full, we can receive a small packet
- * filling RWIN, but with a high skb->truesize, because most NIC use 4K page
- * plus sk_buff metadata even when receiving less than 1500 bytes of payload.
- *
- * Note that we use skb->len to decide to accept or drop this packet,
- * but sk->sk_rmem_alloc is the sum of all skb->truesize.
- */
static bool tcp_can_ingest(const struct sock *sk, const struct sk_buff *skb)
{
unsigned int rmem = atomic_read(&sk->sk_rmem_alloc);
- return rmem + skb->len <= sk->sk_rcvbuf;
+ return rmem <= sk->sk_rcvbuf;
}
static int tcp_try_rmem_schedule(struct sock *sk, const struct sk_buff *skb,
@@ -5371,7 +5357,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP);
- sk->sk_data_ready(sk);
+ READ_ONCE(sk->sk_data_ready)(sk);
tcp_drop_reason(sk, skb, SKB_DROP_REASON_PROTO_MEM);
return;
}
@@ -5581,7 +5567,7 @@ err:
void tcp_data_ready(struct sock *sk)
{
if (tcp_epollin_ready(sk, sk->sk_rcvlowat) || sock_flag(sk, SOCK_DONE))
- sk->sk_data_ready(sk);
+ READ_ONCE(sk->sk_data_ready)(sk);
}
static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
@@ -5637,7 +5623,7 @@ queue_and_out:
inet_csk(sk)->icsk_ack.pending |=
(ICSK_ACK_NOMEM | ICSK_ACK_NOW);
inet_csk_schedule_ack(sk);
- sk->sk_data_ready(sk);
+ READ_ONCE(sk->sk_data_ready)(sk);
if (skb_queue_len(&sk->sk_receive_queue) && skb->len) {
reason = SKB_DROP_REASON_PROTO_MEM;
@@ -6060,7 +6046,9 @@ static void tcp_new_space(struct sock *sk)
tp->snd_cwnd_stamp = tcp_jiffies32;
}
- INDIRECT_CALL_1(sk->sk_write_space, sk_stream_write_space, sk);
+ INDIRECT_CALL_1(READ_ONCE(sk->sk_write_space),
+ sk_stream_write_space,
+ sk);
}
/* Caller made space either from:
@@ -6271,7 +6259,7 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *t
BUG();
WRITE_ONCE(tp->urg_data, TCP_URG_VALID | tmp);
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_data_ready(sk);
+ READ_ONCE(sk->sk_data_ready)(sk);
}
}
}
@@ -7604,6 +7592,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
const struct tcp_sock *tp = tcp_sk(sk);
struct net *net = sock_net(sk);
struct sock *fastopen_sk = NULL;
+ union tcp_seq_and_ts_off st;
struct request_sock *req;
bool want_cookie = false;
struct dst_entry *dst;
@@ -7673,9 +7662,12 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
if (!dst)
goto drop_and_free;
+ if (tmp_opt.tstamp_ok || (!want_cookie && !isn))
+ st = af_ops->init_seq_and_ts_off(net, skb);
+
if (tmp_opt.tstamp_ok) {
tcp_rsk(req)->req_usec_ts = dst_tcp_usec_ts(dst);
- tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb);
+ tcp_rsk(req)->ts_off = st.ts_off;
}
if (!want_cookie && !isn) {
int max_syn_backlog = READ_ONCE(net->ipv4.sysctl_max_syn_backlog);
@@ -7697,7 +7689,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
goto drop_and_release;
}
- isn = af_ops->init_seq(skb);
+ isn = st.seq;
}
tcp_ecn_create_request(req, skb, sk, dst);
@@ -7738,7 +7730,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
sock_put(fastopen_sk);
goto drop_and_free;
}
- sk->sk_data_ready(sk);
+ READ_ONCE(sk->sk_data_ready)(sk);
bh_unlock_sock(fastopen_sk);
sock_put(fastopen_sk);
} else {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 9dc75ba73eff..190e8a238876 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -88,6 +88,7 @@
#include <linux/skbuff_ref.h>
#include <crypto/md5.h>
+#include <crypto/utils.h>
#include <trace/events/tcp.h>
@@ -104,17 +105,14 @@ static DEFINE_PER_CPU(struct sock_bh_locked, ipv4_tcp_sk) = {
static DEFINE_MUTEX(tcp_exit_batch_mutex);
-static u32 tcp_v4_init_seq(const struct sk_buff *skb)
+static union tcp_seq_and_ts_off
+tcp_v4_init_seq_and_ts_off(const struct net *net, const struct sk_buff *skb)
{
- return secure_tcp_seq(ip_hdr(skb)->daddr,
- ip_hdr(skb)->saddr,
- tcp_hdr(skb)->dest,
- tcp_hdr(skb)->source);
-}
-
-static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb)
-{
- return secure_tcp_ts_off(net, ip_hdr(skb)->daddr, ip_hdr(skb)->saddr);
+ return secure_tcp_seq_and_ts_off(net,
+ ip_hdr(skb)->daddr,
+ ip_hdr(skb)->saddr,
+ tcp_hdr(skb)->dest,
+ tcp_hdr(skb)->source);
}
int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
@@ -326,15 +324,16 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len
rt = NULL;
if (likely(!tp->repair)) {
+ union tcp_seq_and_ts_off st;
+
+ st = secure_tcp_seq_and_ts_off(net,
+ inet->inet_saddr,
+ inet->inet_daddr,
+ inet->inet_sport,
+ usin->sin_port);
if (!tp->write_seq)
- WRITE_ONCE(tp->write_seq,
- secure_tcp_seq(inet->inet_saddr,
- inet->inet_daddr,
- inet->inet_sport,
- usin->sin_port));
- WRITE_ONCE(tp->tsoffset,
- secure_tcp_ts_off(net, inet->inet_saddr,
- inet->inet_daddr));
+ WRITE_ONCE(tp->write_seq, st.seq);
+ WRITE_ONCE(tp->tsoffset, st.ts_off);
}
atomic_set(&inet->inet_id, get_random_u16());
@@ -821,7 +820,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb,
goto out;
tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
- if (memcmp(md5_hash_location, newhash, 16) != 0)
+ if (crypto_memneq(md5_hash_location, newhash, 16))
goto out;
}
@@ -1658,8 +1657,7 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
.cookie_init_seq = cookie_v4_init_sequence,
#endif
.route_req = tcp_v4_route_req,
- .init_seq = tcp_v4_init_seq,
- .init_ts_off = tcp_v4_init_ts_off,
+ .init_seq_and_ts_off = tcp_v4_init_seq_and_ts_off,
.send_synack = tcp_v4_send_synack,
};
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index d9c5a43bd281..dafb63b923d0 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -1004,7 +1004,7 @@ enum skb_drop_reason tcp_child_process(struct sock *parent, struct sock *child,
reason = tcp_rcv_state_process(child, skb);
/* Wakeup parent, send SIGIO */
if (state == TCP_SYN_RECV && child->sk_state != state)
- parent->sk_data_ready(parent);
+ READ_ONCE(parent->sk_data_ready)(parent);
} else {
/* Alas, it is possible again, because we do lookup
* in main socket hash table and lock on listening
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 27384024ebc0..668a0284c3d5 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1787,7 +1787,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
* using prepare_to_wait_exclusive().
*/
while (nb) {
- INDIRECT_CALL_1(sk->sk_data_ready,
+ INDIRECT_CALL_1(READ_ONCE(sk->sk_data_ready),
sock_def_readable, sk);
nb--;
}
@@ -2286,7 +2286,6 @@ void udp_lib_rehash(struct sock *sk, u16 newhash, u16 newhash4)
udp_sk(sk)->udp_port_hash);
hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
nhslot2 = udp_hashslot2(udptable, newhash);
- udp_sk(sk)->udp_portaddr_hash = newhash;
if (hslot2 != nhslot2 ||
rcu_access_pointer(sk->sk_reuseport_cb)) {
@@ -2320,19 +2319,25 @@ void udp_lib_rehash(struct sock *sk, u16 newhash, u16 newhash4)
if (udp_hashed4(sk)) {
spin_lock_bh(&hslot->lock);
- udp_rehash4(udptable, sk, newhash4);
- if (hslot2 != nhslot2) {
- spin_lock(&hslot2->lock);
- udp_hash4_dec(hslot2);
- spin_unlock(&hslot2->lock);
-
- spin_lock(&nhslot2->lock);
- udp_hash4_inc(nhslot2);
- spin_unlock(&nhslot2->lock);
+ if (inet_rcv_saddr_any(sk)) {
+ udp_unhash4(udptable, sk);
+ } else {
+ udp_rehash4(udptable, sk, newhash4);
+ if (hslot2 != nhslot2) {
+ spin_lock(&hslot2->lock);
+ udp_hash4_dec(hslot2);
+ spin_unlock(&hslot2->lock);
+
+ spin_lock(&nhslot2->lock);
+ udp_hash4_inc(nhslot2);
+ spin_unlock(&nhslot2->lock);
+ }
}
spin_unlock_bh(&hslot->lock);
}
+
+ udp_sk(sk)->udp_portaddr_hash = newhash;
}
}
EXPORT_IPV6_MOD(udp_lib_rehash);
diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c
index 912f0bfef4af..d328a3078ae0 100644
--- a/net/ipv4/udp_bpf.c
+++ b/net/ipv4/udp_bpf.c
@@ -158,7 +158,7 @@ int udp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
int family = sk->sk_family == AF_INET ? UDP_BPF_IPV4 : UDP_BPF_IPV6;
if (restore) {
- sk->sk_write_space = psock->saved_write_space;
+ WRITE_ONCE(sk->sk_write_space, psock->saved_write_space);
sock_replace_proto(sk, psock->sk_proto);
return 0;
}
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 5e1da088d8e1..182d38e6d6d8 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -95,7 +95,8 @@ static inline int compute_score(struct sock *sk, const struct net *net,
{
int score = -1;
- if (net_eq(sock_net(sk), net) && inet_sk(sk)->inet_num == hnum &&
+ if (net_eq(sock_net(sk), net) &&
+ READ_ONCE(inet_sk(sk)->inet_num) == hnum &&
sk->sk_family == PF_INET6) {
if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
return -1;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 85df25c36409..08cd86f49bf9 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1063,7 +1063,8 @@ static struct net_device *ip6_rt_get_dev_rcu(const struct fib6_result *res)
*/
if (netif_is_l3_slave(dev) &&
!rt6_need_strict(&res->f6i->fib6_dst.addr))
- dev = l3mdev_master_dev_rcu(dev);
+ dev = l3mdev_master_dev_rcu(dev) ? :
+ dev_net(dev)->loopback_dev;
else if (!netif_is_l3_master(dev))
dev = dev_net(dev)->loopback_dev;
/* last case is netif_is_l3_master(dev) is true in which
@@ -3582,7 +3583,6 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
netdevice_tracker *dev_tracker = &fib6_nh->fib_nh_dev_tracker;
struct net_device *dev = NULL;
struct inet6_dev *idev = NULL;
- int addr_type;
int err;
fib6_nh->fib_nh_family = AF_INET6;
@@ -3624,11 +3624,10 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
fib6_nh->fib_nh_weight = 1;
- /* We cannot add true routes via loopback here,
- * they would result in kernel looping; promote them to reject routes
+ /* Reset the nexthop device to the loopback device in case of reject
+ * routes.
*/
- addr_type = ipv6_addr_type(&cfg->fc_dst);
- if (fib6_is_reject(cfg->fc_flags, dev, addr_type)) {
+ if (cfg->fc_flags & RTF_REJECT) {
/* hold loopback dev/idev if we haven't done so. */
if (dev != net->loopback_dev) {
if (dev) {
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 7e007f013ec8..4f6f0d751d6c 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -151,9 +151,14 @@ static struct request_sock *cookie_tcp_check(struct net *net, struct sock *sk,
tcp_parse_options(net, skb, &tcp_opt, 0, NULL);
if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
- tsoff = secure_tcpv6_ts_off(net,
- ipv6_hdr(skb)->daddr.s6_addr32,
- ipv6_hdr(skb)->saddr.s6_addr32);
+ union tcp_seq_and_ts_off st;
+
+ st = secure_tcpv6_seq_and_ts_off(net,
+ ipv6_hdr(skb)->daddr.s6_addr32,
+ ipv6_hdr(skb)->saddr.s6_addr32,
+ tcp_hdr(skb)->dest,
+ tcp_hdr(skb)->source);
+ tsoff = st.ts_off;
tcp_opt.rcv_tsecr -= tsoff;
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index c2816048007d..a62dd2999aec 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -68,6 +68,7 @@
#include <linux/seq_file.h>
#include <crypto/md5.h>
+#include <crypto/utils.h>
#include <trace/events/tcp.h>
@@ -104,18 +105,14 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
}
}
-static u32 tcp_v6_init_seq(const struct sk_buff *skb)
+static union tcp_seq_and_ts_off
+tcp_v6_init_seq_and_ts_off(const struct net *net, const struct sk_buff *skb)
{
- return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
- ipv6_hdr(skb)->saddr.s6_addr32,
- tcp_hdr(skb)->dest,
- tcp_hdr(skb)->source);
-}
-
-static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
-{
- return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
- ipv6_hdr(skb)->saddr.s6_addr32);
+ return secure_tcpv6_seq_and_ts_off(net,
+ ipv6_hdr(skb)->daddr.s6_addr32,
+ ipv6_hdr(skb)->saddr.s6_addr32,
+ tcp_hdr(skb)->dest,
+ tcp_hdr(skb)->source);
}
static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr_unsized *uaddr,
@@ -319,14 +316,16 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr_unsized *uaddr,
sk_set_txhash(sk);
if (likely(!tp->repair)) {
+ union tcp_seq_and_ts_off st;
+
+ st = secure_tcpv6_seq_and_ts_off(net,
+ np->saddr.s6_addr32,
+ sk->sk_v6_daddr.s6_addr32,
+ inet->inet_sport,
+ inet->inet_dport);
if (!tp->write_seq)
- WRITE_ONCE(tp->write_seq,
- secure_tcpv6_seq(np->saddr.s6_addr32,
- sk->sk_v6_daddr.s6_addr32,
- inet->inet_sport,
- inet->inet_dport));
- tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32,
- sk->sk_v6_daddr.s6_addr32);
+ WRITE_ONCE(tp->write_seq, st.seq);
+ tp->tsoffset = st.ts_off;
}
if (tcp_fastopen_defer_connect(sk, &err))
@@ -831,8 +830,7 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
.cookie_init_seq = cookie_v6_init_sequence,
#endif
.route_req = tcp_v6_route_req,
- .init_seq = tcp_v6_init_seq,
- .init_ts_off = tcp_v6_init_ts_off,
+ .init_seq_and_ts_off = tcp_v6_init_seq_and_ts_off,
.send_synack = tcp_v6_send_synack,
};
@@ -1063,7 +1061,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb,
key.type = TCP_KEY_MD5;
tcp_v6_md5_hash_skb(newhash, key.md5_key, NULL, skb);
- if (memcmp(md5_hash_location, newhash, 16) != 0)
+ if (crypto_memneq(md5_hash_location, newhash, 16))
goto out;
}
#endif
diff --git a/net/mac80211/eht.c b/net/mac80211/eht.c
index 75096b2195d2..078e1e23d8d1 100644
--- a/net/mac80211/eht.c
+++ b/net/mac80211/eht.c
@@ -154,6 +154,7 @@ void ieee80211_rx_eml_op_mode_notif(struct ieee80211_sub_if_data *sdata,
u8 *ptr = mgmt->u.action.u.eml_omn.variable;
struct ieee80211_eml_params eml_params = {
.link_id = status->link_id,
+ .control = control,
};
struct sta_info *sta;
int opt_len = 0;
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 7298836469b3..57a456690406 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -212,9 +212,24 @@ void mptcp_pm_send_ack(struct mptcp_sock *msk,
spin_lock_bh(&msk->pm.lock);
}
-void mptcp_pm_addr_send_ack(struct mptcp_sock *msk)
+static bool subflow_in_rm_list(const struct mptcp_subflow_context *subflow,
+ const struct mptcp_rm_list *rm_list)
+{
+ u8 i, id = subflow_get_local_id(subflow);
+
+ for (i = 0; i < rm_list->nr; i++) {
+ if (rm_list->ids[i] == id)
+ return true;
+ }
+
+ return false;
+}
+
+static void
+mptcp_pm_addr_send_ack_avoid_list(struct mptcp_sock *msk,
+ const struct mptcp_rm_list *rm_list)
{
- struct mptcp_subflow_context *subflow, *alt = NULL;
+ struct mptcp_subflow_context *subflow, *stale = NULL, *same_id = NULL;
msk_owned_by_me(msk);
lockdep_assert_held(&msk->pm.lock);
@@ -224,19 +239,35 @@ void mptcp_pm_addr_send_ack(struct mptcp_sock *msk)
return;
mptcp_for_each_subflow(msk, subflow) {
- if (__mptcp_subflow_active(subflow)) {
- if (!subflow->stale) {
- mptcp_pm_send_ack(msk, subflow, false, false);
- return;
- }
+ if (!__mptcp_subflow_active(subflow))
+ continue;
- if (!alt)
- alt = subflow;
+ if (unlikely(subflow->stale)) {
+ if (!stale)
+ stale = subflow;
+ } else if (unlikely(rm_list &&
+ subflow_in_rm_list(subflow, rm_list))) {
+ if (!same_id)
+ same_id = subflow;
+ } else {
+ goto send_ack;
}
}
- if (alt)
- mptcp_pm_send_ack(msk, alt, false, false);
+ if (same_id)
+ subflow = same_id;
+ else if (stale)
+ subflow = stale;
+ else
+ return;
+
+send_ack:
+ mptcp_pm_send_ack(msk, subflow, false, false);
+}
+
+void mptcp_pm_addr_send_ack(struct mptcp_sock *msk)
+{
+ mptcp_pm_addr_send_ack_avoid_list(msk, NULL);
}
int mptcp_pm_mp_prio_send_ack(struct mptcp_sock *msk,
@@ -470,7 +501,7 @@ int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_
msk->pm.rm_list_tx = *rm_list;
rm_addr |= BIT(MPTCP_RM_ADDR_SIGNAL);
WRITE_ONCE(msk->pm.addr_signal, rm_addr);
- mptcp_pm_addr_send_ack(msk);
+ mptcp_pm_addr_send_ack_avoid_list(msk, rm_list);
return 0;
}
diff --git a/net/mptcp/pm_kernel.c b/net/mptcp/pm_kernel.c
index b5316a6c7d1b..b2b9df43960e 100644
--- a/net/mptcp/pm_kernel.c
+++ b/net/mptcp/pm_kernel.c
@@ -418,6 +418,15 @@ subflow:
}
exit:
+ /* If an endpoint has both the signal and subflow flags, but it is not
+ * possible to create subflows -- the 'while' loop body above never
+ * executed -- then still mark the endp as used, which is somehow the
+ * case. This avoids issues later when removing the endpoint and calling
+ * __mark_subflow_endp_available(), which expects the increment here.
+ */
+ if (signal_and_subflow && local.addr.id != msk->mpc_endpoint_id)
+ msk->pm.local_addr_used++;
+
mptcp_pm_nl_check_work_pending(msk);
}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index ae97e4d2c1af..e72a9da85e29 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -833,6 +833,11 @@ static void nft_map_catchall_deactivate(const struct nft_ctx *ctx,
}
}
+/* Use NFT_ITER_UPDATE iterator even if this may be called from the preparation
+ * phase, the set clone might already exist from a previous command, or it might
+ * be a set that is going away and does not require a clone. The netns and
+ * netlink release paths also need to work on the live set.
+ */
static void nft_map_deactivate(const struct nft_ctx *ctx, struct nft_set *set)
{
struct nft_set_iter iter = {
@@ -7110,6 +7115,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
struct nft_data_desc desc;
enum nft_registers dreg;
struct nft_trans *trans;
+ bool set_full = false;
u64 expiration;
u64 timeout;
int err, i;
@@ -7401,10 +7407,18 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
if (err < 0)
goto err_elem_free;
+ if (!(flags & NFT_SET_ELEM_CATCHALL)) {
+ unsigned int max = nft_set_maxsize(set), nelems;
+
+ nelems = atomic_inc_return(&set->nelems);
+ if (nelems > max)
+ set_full = true;
+ }
+
trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set);
if (trans == NULL) {
err = -ENOMEM;
- goto err_elem_free;
+ goto err_set_size;
}
ext->genmask = nft_genmask_cur(ctx->net);
@@ -7456,7 +7470,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
ue->priv = elem_priv;
nft_trans_commit_list_add_elem(ctx->net, trans);
- goto err_elem_free;
+ goto err_set_size;
}
}
}
@@ -7474,23 +7488,16 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
goto err_element_clash;
}
- if (!(flags & NFT_SET_ELEM_CATCHALL)) {
- unsigned int max = nft_set_maxsize(set);
-
- if (!atomic_add_unless(&set->nelems, 1, max)) {
- err = -ENFILE;
- goto err_set_full;
- }
- }
-
nft_trans_container_elem(trans)->elems[0].priv = elem.priv;
nft_trans_commit_list_add_elem(ctx->net, trans);
- return 0;
-err_set_full:
- nft_setelem_remove(ctx->net, set, elem.priv);
+ return set_full ? -ENFILE : 0;
+
err_element_clash:
kfree(trans);
+err_set_size:
+ if (!(flags & NFT_SET_ELEM_CATCHALL))
+ atomic_dec(&set->nelems);
err_elem_free:
nf_tables_set_elem_destroy(ctx, set, elem.priv);
err_parse_data:
@@ -7841,9 +7848,12 @@ static int nft_set_catchall_flush(const struct nft_ctx *ctx,
static int nft_set_flush(struct nft_ctx *ctx, struct nft_set *set, u8 genmask)
{
+ /* The set backend might need to clone the set, do it now from the
+ * preparation phase, use NFT_ITER_UPDATE_CLONE iterator type.
+ */
struct nft_set_iter iter = {
.genmask = genmask,
- .type = NFT_ITER_UPDATE,
+ .type = NFT_ITER_UPDATE_CLONE,
.fn = nft_setelem_flush,
};
@@ -10407,11 +10417,6 @@ static void nft_trans_gc_queue_work(struct nft_trans_gc *trans)
schedule_work(&trans_gc_work);
}
-static int nft_trans_gc_space(struct nft_trans_gc *trans)
-{
- return NFT_TRANS_GC_BATCHCOUNT - trans->count;
-}
-
struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc,
unsigned int gc_seq, gfp_t gfp)
{
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 739b992bde59..b0e571c8e3f3 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -374,6 +374,7 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
{
switch (iter->type) {
case NFT_ITER_UPDATE:
+ case NFT_ITER_UPDATE_CLONE:
/* only relevant for netlink dumps which use READ type */
WARN_ON_ONCE(iter->skip != 0);
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 7ef4b44471d3..a34632ae6048 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -1680,11 +1680,11 @@ static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set,
}
/**
- * pipapo_gc() - Drop expired entries from set, destroy start and end elements
+ * pipapo_gc_scan() - Drop expired entries from set and link them to gc list
* @set: nftables API set representation
* @m: Matching data
*/
-static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
+static void pipapo_gc_scan(struct nft_set *set, struct nft_pipapo_match *m)
{
struct nft_pipapo *priv = nft_set_priv(set);
struct net *net = read_pnet(&set->net);
@@ -1697,6 +1697,8 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
if (!gc)
return;
+ list_add(&gc->list, &priv->gc_head);
+
while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) {
union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
const struct nft_pipapo_field *f;
@@ -1724,9 +1726,13 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
* NFT_SET_ELEM_DEAD_BIT.
*/
if (__nft_set_elem_expired(&e->ext, tstamp)) {
- gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
- if (!gc)
- return;
+ if (!nft_trans_gc_space(gc)) {
+ gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL);
+ if (!gc)
+ return;
+
+ list_add(&gc->list, &priv->gc_head);
+ }
nft_pipapo_gc_deactivate(net, set, e);
pipapo_drop(m, rulemap);
@@ -1740,10 +1746,30 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
}
}
- gc = nft_trans_gc_catchall_sync(gc);
+ priv->last_gc = jiffies;
+}
+
+/**
+ * pipapo_gc_queue() - Free expired elements
+ * @set: nftables API set representation
+ */
+static void pipapo_gc_queue(struct nft_set *set)
+{
+ struct nft_pipapo *priv = nft_set_priv(set);
+ struct nft_trans_gc *gc, *next;
+
+ /* always do a catchall cycle: */
+ gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL);
if (gc) {
+ gc = nft_trans_gc_catchall_sync(gc);
+ if (gc)
+ nft_trans_gc_queue_sync_done(gc);
+ }
+
+ /* always purge queued gc elements. */
+ list_for_each_entry_safe(gc, next, &priv->gc_head, list) {
+ list_del(&gc->list);
nft_trans_gc_queue_sync_done(gc);
- priv->last_gc = jiffies;
}
}
@@ -1797,6 +1823,10 @@ static void pipapo_reclaim_match(struct rcu_head *rcu)
*
* We also need to create a new working copy for subsequent insertions and
* deletions.
+ *
+ * After the live copy has been replaced by the clone, we can safely queue
+ * expired elements that have been collected by pipapo_gc_scan() for
+ * memory reclaim.
*/
static void nft_pipapo_commit(struct nft_set *set)
{
@@ -1807,7 +1837,7 @@ static void nft_pipapo_commit(struct nft_set *set)
return;
if (time_after_eq(jiffies, priv->last_gc + nft_set_gc_interval(set)))
- pipapo_gc(set, priv->clone);
+ pipapo_gc_scan(set, priv->clone);
old = rcu_replace_pointer(priv->match, priv->clone,
nft_pipapo_transaction_mutex_held(set));
@@ -1815,6 +1845,8 @@ static void nft_pipapo_commit(struct nft_set *set)
if (old)
call_rcu(&old->rcu, pipapo_reclaim_match);
+
+ pipapo_gc_queue(set);
}
static void nft_pipapo_abort(const struct nft_set *set)
@@ -2144,13 +2176,20 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
const struct nft_pipapo_match *m;
switch (iter->type) {
- case NFT_ITER_UPDATE:
+ case NFT_ITER_UPDATE_CLONE:
m = pipapo_maybe_clone(set);
if (!m) {
iter->err = -ENOMEM;
return;
}
-
+ nft_pipapo_do_walk(ctx, set, m, iter);
+ break;
+ case NFT_ITER_UPDATE:
+ if (priv->clone)
+ m = priv->clone;
+ else
+ m = rcu_dereference_protected(priv->match,
+ nft_pipapo_transaction_mutex_held(set));
nft_pipapo_do_walk(ctx, set, m, iter);
break;
case NFT_ITER_READ:
@@ -2272,6 +2311,7 @@ static int nft_pipapo_init(const struct nft_set *set,
f->mt = NULL;
}
+ INIT_LIST_HEAD(&priv->gc_head);
rcu_assign_pointer(priv->match, m);
return 0;
@@ -2321,6 +2361,8 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx,
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_match *m;
+ WARN_ON_ONCE(!list_empty(&priv->gc_head));
+
m = rcu_dereference_protected(priv->match, true);
if (priv->clone) {
diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h
index eaab422aa56a..9aee9a9eaeb7 100644
--- a/net/netfilter/nft_set_pipapo.h
+++ b/net/netfilter/nft_set_pipapo.h
@@ -156,12 +156,14 @@ struct nft_pipapo_match {
* @clone: Copy where pending insertions and deletions are kept
* @width: Total bytes to be matched for one packet, including padding
* @last_gc: Timestamp of last garbage collection run, jiffies
+ * @gc_head: list of nft_trans_gc to queue up for mem reclaim
*/
struct nft_pipapo {
struct nft_pipapo_match __rcu *match;
struct nft_pipapo_match *clone;
int width;
unsigned long last_gc;
+ struct list_head gc_head;
};
struct nft_pipapo_elem;
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index e90c5b8b4d5e..e42905376654 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -856,13 +856,15 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
struct nft_rbtree *priv = nft_set_priv(set);
switch (iter->type) {
- case NFT_ITER_UPDATE:
- lockdep_assert_held(&nft_pernet(ctx->net)->commit_mutex);
-
+ case NFT_ITER_UPDATE_CLONE:
if (nft_array_may_resize(set) < 0) {
iter->err = -ENOMEM;
break;
}
+ fallthrough;
+ case NFT_ITER_UPDATE:
+ lockdep_assert_held(&nft_pernet(ctx->net)->commit_mutex);
+
nft_rbtree_do_walk(ctx, set, iter);
break;
case NFT_ITER_READ:
diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c
index 3670bb33732e..7cb1e6aaae90 100644
--- a/net/nfc/digital_core.c
+++ b/net/nfc/digital_core.c
@@ -707,8 +707,10 @@ static int digital_in_send(struct nfc_dev *nfc_dev, struct nfc_target *target,
int rc;
data_exch = kzalloc_obj(*data_exch);
- if (!data_exch)
+ if (!data_exch) {
+ kfree_skb(skb);
return -ENOMEM;
+ }
data_exch->cb = cb;
data_exch->cb_context = cb_context;
@@ -731,8 +733,10 @@ static int digital_in_send(struct nfc_dev *nfc_dev, struct nfc_target *target,
data_exch);
exit:
- if (rc)
+ if (rc) {
+ kfree_skb(skb);
kfree(data_exch);
+ }
return rc;
}
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 6e9b76e2cc56..43d871525dbc 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -567,6 +567,10 @@ static int nci_close_device(struct nci_dev *ndev)
flush_workqueue(ndev->cmd_wq);
timer_delete_sync(&ndev->cmd_timer);
timer_delete_sync(&ndev->data_timer);
+ if (test_bit(NCI_DATA_EXCHANGE, &ndev->flags))
+ nci_data_exchange_complete(ndev, NULL,
+ ndev->cur_conn_id,
+ -ENODEV);
mutex_unlock(&ndev->req_lock);
return 0;
}
@@ -598,6 +602,11 @@ static int nci_close_device(struct nci_dev *ndev)
flush_workqueue(ndev->cmd_wq);
timer_delete_sync(&ndev->cmd_timer);
+ timer_delete_sync(&ndev->data_timer);
+
+ if (test_bit(NCI_DATA_EXCHANGE, &ndev->flags))
+ nci_data_exchange_complete(ndev, NULL, ndev->cur_conn_id,
+ -ENODEV);
/* Clear flags except NCI_UNREG */
ndev->flags &= BIT(NCI_UNREG);
@@ -1035,18 +1044,23 @@ static int nci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target,
struct nci_conn_info *conn_info;
conn_info = ndev->rf_conn_info;
- if (!conn_info)
+ if (!conn_info) {
+ kfree_skb(skb);
return -EPROTO;
+ }
pr_debug("target_idx %d, len %d\n", target->idx, skb->len);
if (!ndev->target_active_prot) {
pr_err("unable to exchange data, no active target\n");
+ kfree_skb(skb);
return -EINVAL;
}
- if (test_and_set_bit(NCI_DATA_EXCHANGE, &ndev->flags))
+ if (test_and_set_bit(NCI_DATA_EXCHANGE, &ndev->flags)) {
+ kfree_skb(skb);
return -EBUSY;
+ }
/* store cb and context to be used on receiving data */
conn_info->data_exchange_cb = cb;
@@ -1482,10 +1496,20 @@ static bool nci_valid_size(struct sk_buff *skb)
unsigned int hdr_size = NCI_CTRL_HDR_SIZE;
if (skb->len < hdr_size ||
- !nci_plen(skb->data) ||
skb->len < hdr_size + nci_plen(skb->data)) {
return false;
}
+
+ if (!nci_plen(skb->data)) {
+ /* Allow zero length in proprietary notifications (0x20 - 0x3F). */
+ if (nci_opcode_oid(nci_opcode(skb->data)) >= 0x20 &&
+ nci_mt(skb->data) == NCI_MT_NTF_PKT)
+ return true;
+
+ /* Disallow zero length otherwise. */
+ return false;
+ }
+
return true;
}
diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c
index 78f4131af3cf..5f98c73db5af 100644
--- a/net/nfc/nci/data.c
+++ b/net/nfc/nci/data.c
@@ -33,7 +33,8 @@ void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb,
conn_info = nci_get_conn_info_by_conn_id(ndev, conn_id);
if (!conn_info) {
kfree_skb(skb);
- goto exit;
+ clear_bit(NCI_DATA_EXCHANGE, &ndev->flags);
+ return;
}
cb = conn_info->data_exchange_cb;
@@ -45,6 +46,12 @@ void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb,
timer_delete_sync(&ndev->data_timer);
clear_bit(NCI_DATA_EXCHANGE_TO, &ndev->flags);
+ /* Mark the exchange as done before calling the callback.
+ * The callback (e.g. rawsock_data_exchange_complete) may
+ * want to immediately queue another data exchange.
+ */
+ clear_bit(NCI_DATA_EXCHANGE, &ndev->flags);
+
if (cb) {
/* forward skb to nfc core */
cb(cb_context, skb, err);
@@ -54,9 +61,6 @@ void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb,
/* no waiting callback, free skb */
kfree_skb(skb);
}
-
-exit:
- clear_bit(NCI_DATA_EXCHANGE, &ndev->flags);
}
/* ----------------- NCI TX Data ----------------- */
diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index b049022399ae..f7d7a599fade 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -67,6 +67,17 @@ static int rawsock_release(struct socket *sock)
if (sock->type == SOCK_RAW)
nfc_sock_unlink(&raw_sk_list, sk);
+ if (sk->sk_state == TCP_ESTABLISHED) {
+ /* Prevent rawsock_tx_work from starting new transmits and
+ * wait for any in-progress work to finish. This must happen
+ * before the socket is orphaned to avoid a race where
+ * rawsock_tx_work runs after the NCI device has been freed.
+ */
+ sk->sk_shutdown |= SEND_SHUTDOWN;
+ cancel_work_sync(&nfc_rawsock(sk)->tx_work);
+ rawsock_write_queue_purge(sk);
+ }
+
sock_orphan(sk);
sock_put(sk);
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 04f310255692..654e23d13e3d 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -490,18 +490,24 @@ bool rds_tcp_tune(struct socket *sock)
struct rds_tcp_net *rtn;
tcp_sock_set_nodelay(sock->sk);
- lock_sock(sk);
/* TCP timer functions might access net namespace even after
* a process which created this net namespace terminated.
*/
if (!sk->sk_net_refcnt) {
- if (!maybe_get_net(net)) {
- release_sock(sk);
+ if (!maybe_get_net(net))
return false;
- }
+ /*
+ * sk_net_refcnt_upgrade() must be called before lock_sock()
+ * because it does a GFP_KERNEL allocation, which can trigger
+ * fs_reclaim and create a circular lock dependency with the
+ * socket lock. The fields it modifies (sk_net_refcnt,
+ * ns_tracker) are not accessed by any concurrent code path
+ * at this point.
+ */
sk_net_refcnt_upgrade(sk);
put_net(net);
}
+ lock_sock(sk);
rtn = net_generic(net, rds_tcp_netid);
if (rtn->sndbuf_size > 0) {
sk->sk_sndbuf = rtn->sndbuf_size;
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index bd51522c7953..7d5e50c921a0 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -1360,6 +1360,12 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
return -EINVAL;
}
+ if (bind && !(flags & TCA_ACT_FLAGS_AT_INGRESS_OR_CLSACT)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Attaching ct to a non ingress/clsact qdisc is unsupported");
+ return -EOPNOTSUPP;
+ }
+
err = nla_parse_nested(tb, TCA_CT_MAX, nla, ct_policy, extack);
if (err < 0)
return err;
diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c
index 686eaed81b81..fdbfcaa3e2ab 100644
--- a/net/sched/act_gate.c
+++ b/net/sched/act_gate.c
@@ -32,9 +32,12 @@ static ktime_t gate_get_time(struct tcf_gate *gact)
return KTIME_MAX;
}
-static void gate_get_start_time(struct tcf_gate *gact, ktime_t *start)
+static void tcf_gate_params_free_rcu(struct rcu_head *head);
+
+static void gate_get_start_time(struct tcf_gate *gact,
+ const struct tcf_gate_params *param,
+ ktime_t *start)
{
- struct tcf_gate_params *param = &gact->param;
ktime_t now, base, cycle;
u64 n;
@@ -69,12 +72,14 @@ static enum hrtimer_restart gate_timer_func(struct hrtimer *timer)
{
struct tcf_gate *gact = container_of(timer, struct tcf_gate,
hitimer);
- struct tcf_gate_params *p = &gact->param;
struct tcfg_gate_entry *next;
+ struct tcf_gate_params *p;
ktime_t close_time, now;
spin_lock(&gact->tcf_lock);
+ p = rcu_dereference_protected(gact->param,
+ lockdep_is_held(&gact->tcf_lock));
next = gact->next_entry;
/* cycle start, clear pending bit, clear total octets */
@@ -225,6 +230,35 @@ static void release_entry_list(struct list_head *entries)
}
}
+static int tcf_gate_copy_entries(struct tcf_gate_params *dst,
+ const struct tcf_gate_params *src,
+ struct netlink_ext_ack *extack)
+{
+ struct tcfg_gate_entry *entry;
+ int i = 0;
+
+ list_for_each_entry(entry, &src->entries, list) {
+ struct tcfg_gate_entry *new;
+
+ new = kzalloc(sizeof(*new), GFP_ATOMIC);
+ if (!new) {
+ NL_SET_ERR_MSG(extack, "Not enough memory for entry");
+ return -ENOMEM;
+ }
+
+ new->index = entry->index;
+ new->gate_state = entry->gate_state;
+ new->interval = entry->interval;
+ new->ipv = entry->ipv;
+ new->maxoctets = entry->maxoctets;
+ list_add_tail(&new->list, &dst->entries);
+ i++;
+ }
+
+ dst->num_entries = i;
+ return 0;
+}
+
static int parse_gate_list(struct nlattr *list_attr,
struct tcf_gate_params *sched,
struct netlink_ext_ack *extack)
@@ -270,24 +304,44 @@ release_list:
return err;
}
-static void gate_setup_timer(struct tcf_gate *gact, u64 basetime,
- enum tk_offsets tko, s32 clockid,
- bool do_init)
+static bool gate_timer_needs_cancel(u64 basetime, u64 old_basetime,
+ enum tk_offsets tko,
+ enum tk_offsets old_tko,
+ s32 clockid, s32 old_clockid)
{
- if (!do_init) {
- if (basetime == gact->param.tcfg_basetime &&
- tko == gact->tk_offset &&
- clockid == gact->param.tcfg_clockid)
- return;
+ return basetime != old_basetime ||
+ clockid != old_clockid ||
+ tko != old_tko;
+}
- spin_unlock_bh(&gact->tcf_lock);
- hrtimer_cancel(&gact->hitimer);
- spin_lock_bh(&gact->tcf_lock);
+static int gate_clock_resolve(s32 clockid, enum tk_offsets *tko,
+ struct netlink_ext_ack *extack)
+{
+ switch (clockid) {
+ case CLOCK_REALTIME:
+ *tko = TK_OFFS_REAL;
+ return 0;
+ case CLOCK_MONOTONIC:
+ *tko = TK_OFFS_MAX;
+ return 0;
+ case CLOCK_BOOTTIME:
+ *tko = TK_OFFS_BOOT;
+ return 0;
+ case CLOCK_TAI:
+ *tko = TK_OFFS_TAI;
+ return 0;
+ default:
+ NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
+ return -EINVAL;
}
- gact->param.tcfg_basetime = basetime;
- gact->param.tcfg_clockid = clockid;
- gact->tk_offset = tko;
- hrtimer_setup(&gact->hitimer, gate_timer_func, clockid, HRTIMER_MODE_ABS_SOFT);
+}
+
+static void gate_setup_timer(struct tcf_gate *gact, s32 clockid,
+ enum tk_offsets tko)
+{
+ WRITE_ONCE(gact->tk_offset, tko);
+ hrtimer_setup(&gact->hitimer, gate_timer_func, clockid,
+ HRTIMER_MODE_ABS_SOFT);
}
static int tcf_gate_init(struct net *net, struct nlattr *nla,
@@ -296,15 +350,22 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, act_gate_ops.net_id);
- enum tk_offsets tk_offset = TK_OFFS_TAI;
+ u64 cycletime = 0, basetime = 0, cycletime_ext = 0;
+ struct tcf_gate_params *p = NULL, *old_p = NULL;
+ enum tk_offsets old_tk_offset = TK_OFFS_TAI;
+ const struct tcf_gate_params *cur_p = NULL;
bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_GATE_MAX + 1];
+ enum tk_offsets tko = TK_OFFS_TAI;
struct tcf_chain *goto_ch = NULL;
- u64 cycletime = 0, basetime = 0;
- struct tcf_gate_params *p;
+ s32 timer_clockid = CLOCK_TAI;
+ bool use_old_entries = false;
+ s32 old_clockid = CLOCK_TAI;
+ bool need_cancel = false;
s32 clockid = CLOCK_TAI;
struct tcf_gate *gact;
struct tc_gate *parm;
+ u64 old_basetime = 0;
int ret = 0, err;
u32 gflags = 0;
s32 prio = -1;
@@ -321,26 +382,8 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
if (!tb[TCA_GATE_PARMS])
return -EINVAL;
- if (tb[TCA_GATE_CLOCKID]) {
+ if (tb[TCA_GATE_CLOCKID])
clockid = nla_get_s32(tb[TCA_GATE_CLOCKID]);
- switch (clockid) {
- case CLOCK_REALTIME:
- tk_offset = TK_OFFS_REAL;
- break;
- case CLOCK_MONOTONIC:
- tk_offset = TK_OFFS_MAX;
- break;
- case CLOCK_BOOTTIME:
- tk_offset = TK_OFFS_BOOT;
- break;
- case CLOCK_TAI:
- tk_offset = TK_OFFS_TAI;
- break;
- default:
- NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
- return -EINVAL;
- }
- }
parm = nla_data(tb[TCA_GATE_PARMS]);
index = parm->index;
@@ -366,6 +409,60 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
return -EEXIST;
}
+ gact = to_gate(*a);
+
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
+
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p) {
+ err = -ENOMEM;
+ goto chain_put;
+ }
+ INIT_LIST_HEAD(&p->entries);
+
+ use_old_entries = !tb[TCA_GATE_ENTRY_LIST];
+ if (!use_old_entries) {
+ err = parse_gate_list(tb[TCA_GATE_ENTRY_LIST], p, extack);
+ if (err < 0)
+ goto err_free;
+ use_old_entries = !err;
+ }
+
+ if (ret == ACT_P_CREATED && use_old_entries) {
+ NL_SET_ERR_MSG(extack, "The entry list is empty");
+ err = -EINVAL;
+ goto err_free;
+ }
+
+ if (ret != ACT_P_CREATED) {
+ rcu_read_lock();
+ cur_p = rcu_dereference(gact->param);
+
+ old_basetime = cur_p->tcfg_basetime;
+ old_clockid = cur_p->tcfg_clockid;
+ old_tk_offset = READ_ONCE(gact->tk_offset);
+
+ basetime = old_basetime;
+ cycletime_ext = cur_p->tcfg_cycletime_ext;
+ prio = cur_p->tcfg_priority;
+ gflags = cur_p->tcfg_flags;
+
+ if (!tb[TCA_GATE_CLOCKID])
+ clockid = old_clockid;
+
+ err = 0;
+ if (use_old_entries) {
+ err = tcf_gate_copy_entries(p, cur_p, extack);
+ if (!err && !tb[TCA_GATE_CYCLE_TIME])
+ cycletime = cur_p->tcfg_cycletime;
+ }
+ rcu_read_unlock();
+ if (err)
+ goto err_free;
+ }
+
if (tb[TCA_GATE_PRIORITY])
prio = nla_get_s32(tb[TCA_GATE_PRIORITY]);
@@ -375,25 +472,26 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
if (tb[TCA_GATE_FLAGS])
gflags = nla_get_u32(tb[TCA_GATE_FLAGS]);
- gact = to_gate(*a);
- if (ret == ACT_P_CREATED)
- INIT_LIST_HEAD(&gact->param.entries);
+ if (tb[TCA_GATE_CYCLE_TIME])
+ cycletime = nla_get_u64(tb[TCA_GATE_CYCLE_TIME]);
- err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
- if (err < 0)
- goto release_idr;
+ if (tb[TCA_GATE_CYCLE_TIME_EXT])
+ cycletime_ext = nla_get_u64(tb[TCA_GATE_CYCLE_TIME_EXT]);
- spin_lock_bh(&gact->tcf_lock);
- p = &gact->param;
+ err = gate_clock_resolve(clockid, &tko, extack);
+ if (err)
+ goto err_free;
+ timer_clockid = clockid;
- if (tb[TCA_GATE_CYCLE_TIME])
- cycletime = nla_get_u64(tb[TCA_GATE_CYCLE_TIME]);
+ need_cancel = ret != ACT_P_CREATED &&
+ gate_timer_needs_cancel(basetime, old_basetime,
+ tko, old_tk_offset,
+ timer_clockid, old_clockid);
- if (tb[TCA_GATE_ENTRY_LIST]) {
- err = parse_gate_list(tb[TCA_GATE_ENTRY_LIST], p, extack);
- if (err < 0)
- goto chain_put;
- }
+ if (need_cancel)
+ hrtimer_cancel(&gact->hitimer);
+
+ spin_lock_bh(&gact->tcf_lock);
if (!cycletime) {
struct tcfg_gate_entry *entry;
@@ -402,22 +500,20 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
list_for_each_entry(entry, &p->entries, list)
cycle = ktime_add_ns(cycle, entry->interval);
cycletime = cycle;
- if (!cycletime) {
- err = -EINVAL;
- goto chain_put;
- }
}
p->tcfg_cycletime = cycletime;
+ p->tcfg_cycletime_ext = cycletime_ext;
- if (tb[TCA_GATE_CYCLE_TIME_EXT])
- p->tcfg_cycletime_ext =
- nla_get_u64(tb[TCA_GATE_CYCLE_TIME_EXT]);
-
- gate_setup_timer(gact, basetime, tk_offset, clockid,
- ret == ACT_P_CREATED);
+ if (need_cancel || ret == ACT_P_CREATED)
+ gate_setup_timer(gact, timer_clockid, tko);
p->tcfg_priority = prio;
p->tcfg_flags = gflags;
- gate_get_start_time(gact, &start);
+ p->tcfg_basetime = basetime;
+ p->tcfg_clockid = timer_clockid;
+ gate_get_start_time(gact, p, &start);
+
+ old_p = rcu_replace_pointer(gact->param, p,
+ lockdep_is_held(&gact->tcf_lock));
gact->current_close_time = start;
gact->current_gate_status = GATE_ACT_GATE_OPEN | GATE_ACT_PENDING;
@@ -434,11 +530,15 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
+ if (old_p)
+ call_rcu(&old_p->rcu, tcf_gate_params_free_rcu);
+
return ret;
+err_free:
+ release_entry_list(&p->entries);
+ kfree(p);
chain_put:
- spin_unlock_bh(&gact->tcf_lock);
-
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
release_idr:
@@ -446,21 +546,29 @@ release_idr:
* without taking tcf_lock.
*/
if (ret == ACT_P_CREATED)
- gate_setup_timer(gact, gact->param.tcfg_basetime,
- gact->tk_offset, gact->param.tcfg_clockid,
- true);
+ gate_setup_timer(gact, timer_clockid, tko);
+
tcf_idr_release(*a, bind);
return err;
}
+static void tcf_gate_params_free_rcu(struct rcu_head *head)
+{
+ struct tcf_gate_params *p = container_of(head, struct tcf_gate_params, rcu);
+
+ release_entry_list(&p->entries);
+ kfree(p);
+}
+
static void tcf_gate_cleanup(struct tc_action *a)
{
struct tcf_gate *gact = to_gate(a);
struct tcf_gate_params *p;
- p = &gact->param;
hrtimer_cancel(&gact->hitimer);
- release_entry_list(&p->entries);
+ p = rcu_dereference_protected(gact->param, 1);
+ if (p)
+ call_rcu(&p->rcu, tcf_gate_params_free_rcu);
}
static int dumping_entry(struct sk_buff *skb,
@@ -509,10 +617,9 @@ static int tcf_gate_dump(struct sk_buff *skb, struct tc_action *a,
struct nlattr *entry_list;
struct tcf_t t;
- spin_lock_bh(&gact->tcf_lock);
- opt.action = gact->tcf_action;
-
- p = &gact->param;
+ rcu_read_lock();
+ opt.action = READ_ONCE(gact->tcf_action);
+ p = rcu_dereference(gact->param);
if (nla_put(skb, TCA_GATE_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
@@ -552,12 +659,12 @@ static int tcf_gate_dump(struct sk_buff *skb, struct tc_action *a,
tcf_tm_dump(&t, &gact->tcf_tm);
if (nla_put_64bit(skb, TCA_GATE_TM, sizeof(t), &t, TCA_GATE_PAD))
goto nla_put_failure;
- spin_unlock_bh(&gact->tcf_lock);
+ rcu_read_unlock();
return skb->len;
nla_put_failure:
- spin_unlock_bh(&gact->tcf_lock);
+ rcu_read_unlock();
nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 79df81d12894..d5e8a91bb4eb 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -293,8 +293,8 @@ static int load_metaops_and_vet(u32 metaid, void *val, int len, bool rtnl_held)
/* called when adding new meta information
*/
static int __add_metainfo(const struct tcf_meta_ops *ops,
- struct tcf_ife_info *ife, u32 metaid, void *metaval,
- int len, bool atomic, bool exists)
+ struct tcf_ife_params *p, u32 metaid, void *metaval,
+ int len, bool atomic)
{
struct tcf_meta_info *mi = NULL;
int ret = 0;
@@ -313,45 +313,40 @@ static int __add_metainfo(const struct tcf_meta_ops *ops,
}
}
- if (exists)
- spin_lock_bh(&ife->tcf_lock);
- list_add_tail(&mi->metalist, &ife->metalist);
- if (exists)
- spin_unlock_bh(&ife->tcf_lock);
+ list_add_tail(&mi->metalist, &p->metalist);
return ret;
}
static int add_metainfo_and_get_ops(const struct tcf_meta_ops *ops,
- struct tcf_ife_info *ife, u32 metaid,
- bool exists)
+ struct tcf_ife_params *p, u32 metaid)
{
int ret;
if (!try_module_get(ops->owner))
return -ENOENT;
- ret = __add_metainfo(ops, ife, metaid, NULL, 0, true, exists);
+ ret = __add_metainfo(ops, p, metaid, NULL, 0, true);
if (ret)
module_put(ops->owner);
return ret;
}
-static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval,
- int len, bool exists)
+static int add_metainfo(struct tcf_ife_params *p, u32 metaid, void *metaval,
+ int len)
{
const struct tcf_meta_ops *ops = find_ife_oplist(metaid);
int ret;
if (!ops)
return -ENOENT;
- ret = __add_metainfo(ops, ife, metaid, metaval, len, false, exists);
+ ret = __add_metainfo(ops, p, metaid, metaval, len, false);
if (ret)
/*put back what find_ife_oplist took */
module_put(ops->owner);
return ret;
}
-static int use_all_metadata(struct tcf_ife_info *ife, bool exists)
+static int use_all_metadata(struct tcf_ife_params *p)
{
struct tcf_meta_ops *o;
int rc = 0;
@@ -359,7 +354,7 @@ static int use_all_metadata(struct tcf_ife_info *ife, bool exists)
read_lock(&ife_mod_lock);
list_for_each_entry(o, &ifeoplist, list) {
- rc = add_metainfo_and_get_ops(o, ife, o->metaid, exists);
+ rc = add_metainfo_and_get_ops(o, p, o->metaid);
if (rc == 0)
installed += 1;
}
@@ -371,7 +366,7 @@ static int use_all_metadata(struct tcf_ife_info *ife, bool exists)
return -EINVAL;
}
-static int dump_metalist(struct sk_buff *skb, struct tcf_ife_info *ife)
+static int dump_metalist(struct sk_buff *skb, struct tcf_ife_params *p)
{
struct tcf_meta_info *e;
struct nlattr *nest;
@@ -379,14 +374,14 @@ static int dump_metalist(struct sk_buff *skb, struct tcf_ife_info *ife)
int total_encoded = 0;
/*can only happen on decode */
- if (list_empty(&ife->metalist))
+ if (list_empty(&p->metalist))
return 0;
nest = nla_nest_start_noflag(skb, TCA_IFE_METALST);
if (!nest)
goto out_nlmsg_trim;
- list_for_each_entry(e, &ife->metalist, metalist) {
+ list_for_each_entry(e, &p->metalist, metalist) {
if (!e->ops->get(skb, e))
total_encoded += 1;
}
@@ -403,13 +398,11 @@ out_nlmsg_trim:
return -1;
}
-/* under ife->tcf_lock */
-static void _tcf_ife_cleanup(struct tc_action *a)
+static void __tcf_ife_cleanup(struct tcf_ife_params *p)
{
- struct tcf_ife_info *ife = to_ife(a);
struct tcf_meta_info *e, *n;
- list_for_each_entry_safe(e, n, &ife->metalist, metalist) {
+ list_for_each_entry_safe(e, n, &p->metalist, metalist) {
list_del(&e->metalist);
if (e->metaval) {
if (e->ops->release)
@@ -422,18 +415,23 @@ static void _tcf_ife_cleanup(struct tc_action *a)
}
}
+static void tcf_ife_cleanup_params(struct rcu_head *head)
+{
+ struct tcf_ife_params *p = container_of(head, struct tcf_ife_params,
+ rcu);
+
+ __tcf_ife_cleanup(p);
+ kfree(p);
+}
+
static void tcf_ife_cleanup(struct tc_action *a)
{
struct tcf_ife_info *ife = to_ife(a);
struct tcf_ife_params *p;
- spin_lock_bh(&ife->tcf_lock);
- _tcf_ife_cleanup(a);
- spin_unlock_bh(&ife->tcf_lock);
-
p = rcu_dereference_protected(ife->params, 1);
if (p)
- kfree_rcu(p, rcu);
+ call_rcu(&p->rcu, tcf_ife_cleanup_params);
}
static int load_metalist(struct nlattr **tb, bool rtnl_held)
@@ -455,8 +453,7 @@ static int load_metalist(struct nlattr **tb, bool rtnl_held)
return 0;
}
-static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
- bool exists, bool rtnl_held)
+static int populate_metalist(struct tcf_ife_params *p, struct nlattr **tb)
{
int len = 0;
int rc = 0;
@@ -468,7 +465,7 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
val = nla_data(tb[i]);
len = nla_len(tb[i]);
- rc = add_metainfo(ife, i, val, len, exists);
+ rc = add_metainfo(p, i, val, len);
if (rc)
return rc;
}
@@ -523,6 +520,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
p = kzalloc_obj(*p);
if (!p)
return -ENOMEM;
+ INIT_LIST_HEAD(&p->metalist);
if (tb[TCA_IFE_METALST]) {
err = nla_parse_nested_deprecated(tb2, IFE_META_MAX,
@@ -567,8 +565,6 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
}
ife = to_ife(*a);
- if (ret == ACT_P_CREATED)
- INIT_LIST_HEAD(&ife->metalist);
err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
if (err < 0)
@@ -600,8 +596,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
}
if (tb[TCA_IFE_METALST]) {
- err = populate_metalist(ife, tb2, exists,
- !(flags & TCA_ACT_FLAGS_NO_RTNL));
+ err = populate_metalist(p, tb2);
if (err)
goto metadata_parse_err;
} else {
@@ -610,7 +605,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
* as we can. You better have at least one else we are
* going to bail out
*/
- err = use_all_metadata(ife, exists);
+ err = use_all_metadata(p);
if (err)
goto metadata_parse_err;
}
@@ -626,13 +621,14 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
if (p)
- kfree_rcu(p, rcu);
+ call_rcu(&p->rcu, tcf_ife_cleanup_params);
return ret;
metadata_parse_err:
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
release_idr:
+ __tcf_ife_cleanup(p);
kfree(p);
tcf_idr_release(*a, bind);
return err;
@@ -679,7 +675,7 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind,
if (nla_put(skb, TCA_IFE_TYPE, 2, &p->eth_type))
goto nla_put_failure;
- if (dump_metalist(skb, ife)) {
+ if (dump_metalist(skb, p)) {
/*ignore failure to dump metalist */
pr_info("Failed to dump metalist\n");
}
@@ -693,13 +689,13 @@ nla_put_failure:
return -1;
}
-static int find_decode_metaid(struct sk_buff *skb, struct tcf_ife_info *ife,
+static int find_decode_metaid(struct sk_buff *skb, struct tcf_ife_params *p,
u16 metaid, u16 mlen, void *mdata)
{
struct tcf_meta_info *e;
/* XXX: use hash to speed up */
- list_for_each_entry(e, &ife->metalist, metalist) {
+ list_for_each_entry_rcu(e, &p->metalist, metalist) {
if (metaid == e->metaid) {
if (e->ops) {
/* We check for decode presence already */
@@ -716,10 +712,13 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
{
struct tcf_ife_info *ife = to_ife(a);
int action = ife->tcf_action;
+ struct tcf_ife_params *p;
u8 *ifehdr_end;
u8 *tlv_data;
u16 metalen;
+ p = rcu_dereference_bh(ife->params);
+
bstats_update(this_cpu_ptr(ife->common.cpu_bstats), skb);
tcf_lastuse_update(&ife->tcf_tm);
@@ -745,7 +744,7 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
return TC_ACT_SHOT;
}
- if (find_decode_metaid(skb, ife, mtype, dlen, curr_data)) {
+ if (find_decode_metaid(skb, p, mtype, dlen, curr_data)) {
/* abuse overlimits to count when we receive metadata
* but dont have an ops for it
*/
@@ -769,12 +768,12 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
/*XXX: check if we can do this at install time instead of current
* send data path
**/
-static int ife_get_sz(struct sk_buff *skb, struct tcf_ife_info *ife)
+static int ife_get_sz(struct sk_buff *skb, struct tcf_ife_params *p)
{
- struct tcf_meta_info *e, *n;
+ struct tcf_meta_info *e;
int tot_run_sz = 0, run_sz = 0;
- list_for_each_entry_safe(e, n, &ife->metalist, metalist) {
+ list_for_each_entry_rcu(e, &p->metalist, metalist) {
if (e->ops->check_presence) {
run_sz = e->ops->check_presence(skb, e);
tot_run_sz += run_sz;
@@ -795,7 +794,7 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
OUTERHDR:TOTMETALEN:{TLVHDR:Metadatum:TLVHDR..}:ORIGDATA
where ORIGDATA = original ethernet header ...
*/
- u16 metalen = ife_get_sz(skb, ife);
+ u16 metalen = ife_get_sz(skb, p);
int hdrm = metalen + skb->dev->hard_header_len + IFE_METAHDRLEN;
unsigned int skboff = 0;
int new_len = skb->len + hdrm;
@@ -833,25 +832,21 @@ drop:
if (!ife_meta)
goto drop;
- spin_lock(&ife->tcf_lock);
-
/* XXX: we dont have a clever way of telling encode to
* not repeat some of the computations that are done by
* ops->presence_check...
*/
- list_for_each_entry(e, &ife->metalist, metalist) {
+ list_for_each_entry_rcu(e, &p->metalist, metalist) {
if (e->ops->encode) {
err = e->ops->encode(skb, (void *)(ife_meta + skboff),
e);
}
if (err < 0) {
/* too corrupt to keep around if overwritten */
- spin_unlock(&ife->tcf_lock);
goto drop;
}
skboff += err;
}
- spin_unlock(&ife->tcf_lock);
oethh = (struct ethhdr *)skb->data;
if (!is_zero_ether_addr(p->eth_src))
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 343309e9009b..4829c27446e3 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -2228,6 +2228,11 @@ static bool is_qdisc_ingress(__u32 classid)
return (TC_H_MIN(classid) == TC_H_MIN(TC_H_MIN_INGRESS));
}
+static bool is_ingress_or_clsact(struct tcf_block *block, struct Qdisc *q)
+{
+ return tcf_block_shared(block) || (q && !!(q->flags & TCQ_F_INGRESS));
+}
+
static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
struct netlink_ext_ack *extack)
{
@@ -2420,6 +2425,8 @@ replay:
flags |= TCA_ACT_FLAGS_NO_RTNL;
if (is_qdisc_ingress(parent))
flags |= TCA_ACT_FLAGS_AT_INGRESS;
+ if (is_ingress_or_clsact(block, q))
+ flags |= TCA_ACT_FLAGS_AT_INGRESS_OR_CLSACT;
err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
flags, extack);
if (err == 0) {
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index a64d27476231..ffea9fbd522d 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -391,8 +391,8 @@ static const u32 inv_sqrt_cache[REC_INV_SQRT_CACHE] = {
1239850263, 1191209601, 1147878294, 1108955788
};
-static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu,
- u64 target_ns, u64 rtt_est_ns);
+static void cake_configure_rates(struct Qdisc *sch, u64 rate, bool rate_adjust);
+
/* http://en.wikipedia.org/wiki/Methods_of_computing_square_roots
* new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2)
*
@@ -2013,7 +2013,8 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
u64 delay;
u32 len;
- if (q->config->is_shared && now - q->last_checked_active >= q->config->sync_time) {
+ if (q->config->is_shared && q->rate_ns &&
+ now - q->last_checked_active >= q->config->sync_time) {
struct net_device *dev = qdisc_dev(sch);
struct cake_sched_data *other_priv;
u64 new_rate = q->config->rate_bps;
@@ -2039,12 +2040,9 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
if (num_active_qs > 1)
new_rate = div64_u64(q->config->rate_bps, num_active_qs);
- /* mtu = 0 is used to only update the rate and not mess with cobalt params */
- cake_set_rate(b, new_rate, 0, 0, 0);
+ cake_configure_rates(sch, new_rate, true);
q->last_checked_active = now;
q->active_queues = num_active_qs;
- q->rate_ns = b->tin_rate_ns;
- q->rate_shft = b->tin_rate_shft;
}
begin:
@@ -2361,12 +2359,10 @@ static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu,
b->cparams.p_dec = 1 << 20; /* 1/4096 */
}
-static int cake_config_besteffort(struct Qdisc *sch)
+static int cake_config_besteffort(struct Qdisc *sch, u64 rate, u32 mtu)
{
struct cake_sched_data *q = qdisc_priv(sch);
struct cake_tin_data *b = &q->tins[0];
- u32 mtu = psched_mtu(qdisc_dev(sch));
- u64 rate = q->config->rate_bps;
q->tin_cnt = 1;
@@ -2380,12 +2376,10 @@ static int cake_config_besteffort(struct Qdisc *sch)
return 0;
}
-static int cake_config_precedence(struct Qdisc *sch)
+static int cake_config_precedence(struct Qdisc *sch, u64 rate, u32 mtu)
{
/* convert high-level (user visible) parameters into internal format */
struct cake_sched_data *q = qdisc_priv(sch);
- u32 mtu = psched_mtu(qdisc_dev(sch));
- u64 rate = q->config->rate_bps;
u32 quantum = 256;
u32 i;
@@ -2456,7 +2450,7 @@ static int cake_config_precedence(struct Qdisc *sch)
* Total 12 traffic classes.
*/
-static int cake_config_diffserv8(struct Qdisc *sch)
+static int cake_config_diffserv8(struct Qdisc *sch, u64 rate, u32 mtu)
{
/* Pruned list of traffic classes for typical applications:
*
@@ -2473,8 +2467,6 @@ static int cake_config_diffserv8(struct Qdisc *sch)
*/
struct cake_sched_data *q = qdisc_priv(sch);
- u32 mtu = psched_mtu(qdisc_dev(sch));
- u64 rate = q->config->rate_bps;
u32 quantum = 256;
u32 i;
@@ -2504,7 +2496,7 @@ static int cake_config_diffserv8(struct Qdisc *sch)
return 0;
}
-static int cake_config_diffserv4(struct Qdisc *sch)
+static int cake_config_diffserv4(struct Qdisc *sch, u64 rate, u32 mtu)
{
/* Further pruned list of traffic classes for four-class system:
*
@@ -2517,8 +2509,6 @@ static int cake_config_diffserv4(struct Qdisc *sch)
*/
struct cake_sched_data *q = qdisc_priv(sch);
- u32 mtu = psched_mtu(qdisc_dev(sch));
- u64 rate = q->config->rate_bps;
u32 quantum = 1024;
q->tin_cnt = 4;
@@ -2546,7 +2536,7 @@ static int cake_config_diffserv4(struct Qdisc *sch)
return 0;
}
-static int cake_config_diffserv3(struct Qdisc *sch)
+static int cake_config_diffserv3(struct Qdisc *sch, u64 rate, u32 mtu)
{
/* Simplified Diffserv structure with 3 tins.
* Latency Sensitive (CS7, CS6, EF, VA, TOS4)
@@ -2554,8 +2544,6 @@ static int cake_config_diffserv3(struct Qdisc *sch)
* Low Priority (LE, CS1)
*/
struct cake_sched_data *q = qdisc_priv(sch);
- u32 mtu = psched_mtu(qdisc_dev(sch));
- u64 rate = q->config->rate_bps;
u32 quantum = 1024;
q->tin_cnt = 3;
@@ -2580,32 +2568,33 @@ static int cake_config_diffserv3(struct Qdisc *sch)
return 0;
}
-static void cake_reconfigure(struct Qdisc *sch)
+static void cake_configure_rates(struct Qdisc *sch, u64 rate, bool rate_adjust)
{
+ u32 mtu = likely(rate_adjust) ? 0 : psched_mtu(qdisc_dev(sch));
struct cake_sched_data *qd = qdisc_priv(sch);
struct cake_sched_config *q = qd->config;
int c, ft;
switch (q->tin_mode) {
case CAKE_DIFFSERV_BESTEFFORT:
- ft = cake_config_besteffort(sch);
+ ft = cake_config_besteffort(sch, rate, mtu);
break;
case CAKE_DIFFSERV_PRECEDENCE:
- ft = cake_config_precedence(sch);
+ ft = cake_config_precedence(sch, rate, mtu);
break;
case CAKE_DIFFSERV_DIFFSERV8:
- ft = cake_config_diffserv8(sch);
+ ft = cake_config_diffserv8(sch, rate, mtu);
break;
case CAKE_DIFFSERV_DIFFSERV4:
- ft = cake_config_diffserv4(sch);
+ ft = cake_config_diffserv4(sch, rate, mtu);
break;
case CAKE_DIFFSERV_DIFFSERV3:
default:
- ft = cake_config_diffserv3(sch);
+ ft = cake_config_diffserv3(sch, rate, mtu);
break;
}
@@ -2616,6 +2605,14 @@ static void cake_reconfigure(struct Qdisc *sch)
qd->rate_ns = qd->tins[ft].tin_rate_ns;
qd->rate_shft = qd->tins[ft].tin_rate_shft;
+}
+
+static void cake_reconfigure(struct Qdisc *sch)
+{
+ struct cake_sched_data *qd = qdisc_priv(sch);
+ struct cake_sched_config *q = qd->config;
+
+ cake_configure_rates(sch, qd->config->rate_bps, false);
if (q->buffer_config_limit) {
qd->buffer_limit = q->buffer_config_limit;
diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
index 306e046276d4..a4b07b661b77 100644
--- a/net/sched/sch_ets.c
+++ b/net/sched/sch_ets.c
@@ -115,12 +115,12 @@ static void ets_offload_change(struct Qdisc *sch)
struct ets_sched *q = qdisc_priv(sch);
struct tc_ets_qopt_offload qopt;
unsigned int w_psum_prev = 0;
- unsigned int q_psum = 0;
- unsigned int q_sum = 0;
unsigned int quantum;
unsigned int w_psum;
unsigned int weight;
unsigned int i;
+ u64 q_psum = 0;
+ u64 q_sum = 0;
if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
return;
@@ -138,8 +138,12 @@ static void ets_offload_change(struct Qdisc *sch)
for (i = 0; i < q->nbands; i++) {
quantum = q->classes[i].quantum;
- q_psum += quantum;
- w_psum = quantum ? q_psum * 100 / q_sum : 0;
+ if (quantum) {
+ q_psum += quantum;
+ w_psum = div64_u64(q_psum * 100, q_sum);
+ } else {
+ w_psum = 0;
+ }
weight = w_psum - w_psum_prev;
w_psum_prev = w_psum;
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 9a550f832d78..f2edcf872981 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -823,6 +823,7 @@ static void fq_reset(struct Qdisc *sch)
for (idx = 0; idx < FQ_BANDS; idx++) {
q->band_flows[idx].new_flows.first = NULL;
q->band_flows[idx].old_flows.first = NULL;
+ q->band_pkt_count[idx] = 0;
}
q->delayed = RB_ROOT;
q->flows = 0;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 3a7e00c063c3..cd83d11bc8e4 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1785,7 +1785,7 @@ restart:
__skb_queue_tail(&other->sk_receive_queue, skb);
spin_unlock(&other->sk_receive_queue.lock);
unix_state_unlock(other);
- other->sk_data_ready(other);
+ READ_ONCE(other->sk_data_ready)(other);
sock_put(other);
return 0;
@@ -2278,7 +2278,7 @@ restart_locked:
scm_stat_add(other, skb);
skb_queue_tail(&other->sk_receive_queue, skb);
unix_state_unlock(other);
- other->sk_data_ready(other);
+ READ_ONCE(other->sk_data_ready)(other);
sock_put(other);
scm_destroy(&scm);
return len;
@@ -2351,7 +2351,7 @@ static int queue_oob(struct sock *sk, struct msghdr *msg, struct sock *other,
sk_send_sigurg(other);
unix_state_unlock(other);
- other->sk_data_ready(other);
+ READ_ONCE(other->sk_data_ready)(other);
return 0;
out_unlock:
@@ -2477,7 +2477,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
spin_unlock(&other->sk_receive_queue.lock);
unix_state_unlock(other);
- other->sk_data_ready(other);
+ READ_ONCE(other->sk_data_ready)(other);
sent += size;
}
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 3b46bc635c43..6149f6a79897 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -167,26 +167,32 @@ static int xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
struct xdp_buff_xsk *pos, *tmp;
struct list_head *xskb_list;
u32 contd = 0;
+ u32 num_desc;
int err;
- if (frags)
- contd = XDP_PKT_CONTD;
+ if (likely(!frags)) {
+ err = __xsk_rcv_zc(xs, xskb, len, contd);
+ if (err)
+ goto err;
+ return 0;
+ }
- err = __xsk_rcv_zc(xs, xskb, len, contd);
- if (err)
+ contd = XDP_PKT_CONTD;
+ num_desc = xdp_get_shared_info_from_buff(xdp)->nr_frags + 1;
+ if (xskq_prod_nb_free(xs->rx, num_desc) < num_desc) {
+ xs->rx_queue_full++;
+ err = -ENOBUFS;
goto err;
- if (likely(!frags))
- return 0;
+ }
+ __xsk_rcv_zc(xs, xskb, len, contd);
xskb_list = &xskb->pool->xskb_list;
list_for_each_entry_safe(pos, tmp, xskb_list, list_node) {
if (list_is_singular(xskb_list))
contd = 0;
len = pos->xdp.data_end - pos->xdp.data;
- err = __xsk_rcv_zc(xs, pos, len, contd);
- if (err)
- goto err;
- list_del(&pos->list_node);
+ __xsk_rcv_zc(xs, pos, len, contd);
+ list_del_init(&pos->list_node);
}
return 0;
diff --git a/rust/kernel/io.rs b/rust/kernel/io.rs
index c1cca7b438c3..e5fba6bf6db0 100644
--- a/rust/kernel/io.rs
+++ b/rust/kernel/io.rs
@@ -139,9 +139,9 @@ pub struct Mmio<const SIZE: usize = 0>(MmioRaw<SIZE>);
/// Internal helper macros used to invoke C MMIO read functions.
///
-/// This macro is intended to be used by higher-level MMIO access macros (define_read) and provides
-/// a unified expansion for infallible vs. fallible read semantics. It emits a direct call into the
-/// corresponding C helper and performs the required cast to the Rust return type.
+/// This macro is intended to be used by higher-level MMIO access macros (io_define_read) and
+/// provides a unified expansion for infallible vs. fallible read semantics. It emits a direct call
+/// into the corresponding C helper and performs the required cast to the Rust return type.
///
/// # Parameters
///
@@ -166,9 +166,9 @@ macro_rules! call_mmio_read {
/// Internal helper macros used to invoke C MMIO write functions.
///
-/// This macro is intended to be used by higher-level MMIO access macros (define_write) and provides
-/// a unified expansion for infallible vs. fallible write semantics. It emits a direct call into the
-/// corresponding C helper and performs the required cast to the Rust return type.
+/// This macro is intended to be used by higher-level MMIO access macros (io_define_write) and
+/// provides a unified expansion for infallible vs. fallible write semantics. It emits a direct call
+/// into the corresponding C helper and performs the required cast to the Rust return type.
///
/// # Parameters
///
@@ -193,7 +193,30 @@ macro_rules! call_mmio_write {
}};
}
-macro_rules! define_read {
+/// Generates an accessor method for reading from an I/O backend.
+///
+/// This macro reduces boilerplate by automatically generating either compile-time bounds-checked
+/// (infallible) or runtime bounds-checked (fallible) read methods. It abstracts the address
+/// calculation and bounds checking, and delegates the actual I/O read operation to a specified
+/// helper macro, making it generic over different I/O backends.
+///
+/// # Parameters
+///
+/// * `infallible` / `fallible` - Determines the bounds-checking strategy. `infallible` relies on
+/// `IoKnownSize` for compile-time checks and returns the value directly. `fallible` performs
+/// runtime checks against `maxsize()` and returns a `Result<T>`.
+/// * `$(#[$attr:meta])*` - Optional attributes to apply to the generated method (e.g.,
+/// `#[cfg(CONFIG_64BIT)]` or inline directives).
+/// * `$vis:vis` - The visibility of the generated method (e.g., `pub`).
+/// * `$name:ident` / `$try_name:ident` - The name of the generated method (e.g., `read32`,
+/// `try_read8`).
+/// * `$call_macro:ident` - The backend-specific helper macro used to emit the actual I/O call
+/// (e.g., `call_mmio_read`).
+/// * `$c_fn:ident` - The backend-specific C function or identifier to be passed into the
+/// `$call_macro`.
+/// * `$type_name:ty` - The Rust type of the value being read (e.g., `u8`, `u32`).
+#[macro_export]
+macro_rules! io_define_read {
(infallible, $(#[$attr:meta])* $vis:vis $name:ident, $call_macro:ident($c_fn:ident) ->
$type_name:ty) => {
/// Read IO data from a given offset known at compile time.
@@ -226,9 +249,33 @@ macro_rules! define_read {
}
};
}
-pub(crate) use define_read;
+pub use io_define_read;
-macro_rules! define_write {
+/// Generates an accessor method for writing to an I/O backend.
+///
+/// This macro reduces boilerplate by automatically generating either compile-time bounds-checked
+/// (infallible) or runtime bounds-checked (fallible) write methods. It abstracts the address
+/// calculation and bounds checking, and delegates the actual I/O write operation to a specified
+/// helper macro, making it generic over different I/O backends.
+///
+/// # Parameters
+///
+/// * `infallible` / `fallible` - Determines the bounds-checking strategy. `infallible` relies on
+/// `IoKnownSize` for compile-time checks and returns `()`. `fallible` performs runtime checks
+/// against `maxsize()` and returns a `Result`.
+/// * `$(#[$attr:meta])*` - Optional attributes to apply to the generated method (e.g.,
+/// `#[cfg(CONFIG_64BIT)]` or inline directives).
+/// * `$vis:vis` - The visibility of the generated method (e.g., `pub`).
+/// * `$name:ident` / `$try_name:ident` - The name of the generated method (e.g., `write32`,
+/// `try_write8`).
+/// * `$call_macro:ident` - The backend-specific helper macro used to emit the actual I/O call
+/// (e.g., `call_mmio_write`).
+/// * `$c_fn:ident` - The backend-specific C function or identifier to be passed into the
+/// `$call_macro`.
+/// * `$type_name:ty` - The Rust type of the value being written (e.g., `u8`, `u32`). Note the use
+/// of `<-` before the type to denote a write operation.
+#[macro_export]
+macro_rules! io_define_write {
(infallible, $(#[$attr:meta])* $vis:vis $name:ident, $call_macro:ident($c_fn:ident) <-
$type_name:ty) => {
/// Write IO data from a given offset known at compile time.
@@ -259,7 +306,7 @@ macro_rules! define_write {
}
};
}
-pub(crate) use define_write;
+pub use io_define_write;
/// Checks whether an access of type `U` at the given `offset`
/// is valid within this region.
@@ -509,40 +556,40 @@ impl<const SIZE: usize> Io for Mmio<SIZE> {
self.0.maxsize()
}
- define_read!(fallible, try_read8, call_mmio_read(readb) -> u8);
- define_read!(fallible, try_read16, call_mmio_read(readw) -> u16);
- define_read!(fallible, try_read32, call_mmio_read(readl) -> u32);
- define_read!(
+ io_define_read!(fallible, try_read8, call_mmio_read(readb) -> u8);
+ io_define_read!(fallible, try_read16, call_mmio_read(readw) -> u16);
+ io_define_read!(fallible, try_read32, call_mmio_read(readl) -> u32);
+ io_define_read!(
fallible,
#[cfg(CONFIG_64BIT)]
try_read64,
call_mmio_read(readq) -> u64
);
- define_write!(fallible, try_write8, call_mmio_write(writeb) <- u8);
- define_write!(fallible, try_write16, call_mmio_write(writew) <- u16);
- define_write!(fallible, try_write32, call_mmio_write(writel) <- u32);
- define_write!(
+ io_define_write!(fallible, try_write8, call_mmio_write(writeb) <- u8);
+ io_define_write!(fallible, try_write16, call_mmio_write(writew) <- u16);
+ io_define_write!(fallible, try_write32, call_mmio_write(writel) <- u32);
+ io_define_write!(
fallible,
#[cfg(CONFIG_64BIT)]
try_write64,
call_mmio_write(writeq) <- u64
);
- define_read!(infallible, read8, call_mmio_read(readb) -> u8);
- define_read!(infallible, read16, call_mmio_read(readw) -> u16);
- define_read!(infallible, read32, call_mmio_read(readl) -> u32);
- define_read!(
+ io_define_read!(infallible, read8, call_mmio_read(readb) -> u8);
+ io_define_read!(infallible, read16, call_mmio_read(readw) -> u16);
+ io_define_read!(infallible, read32, call_mmio_read(readl) -> u32);
+ io_define_read!(
infallible,
#[cfg(CONFIG_64BIT)]
read64,
call_mmio_read(readq) -> u64
);
- define_write!(infallible, write8, call_mmio_write(writeb) <- u8);
- define_write!(infallible, write16, call_mmio_write(writew) <- u16);
- define_write!(infallible, write32, call_mmio_write(writel) <- u32);
- define_write!(
+ io_define_write!(infallible, write8, call_mmio_write(writeb) <- u8);
+ io_define_write!(infallible, write16, call_mmio_write(writew) <- u16);
+ io_define_write!(infallible, write32, call_mmio_write(writel) <- u32);
+ io_define_write!(
infallible,
#[cfg(CONFIG_64BIT)]
write64,
@@ -566,40 +613,40 @@ impl<const SIZE: usize> Mmio<SIZE> {
unsafe { &*core::ptr::from_ref(raw).cast() }
}
- define_read!(infallible, pub read8_relaxed, call_mmio_read(readb_relaxed) -> u8);
- define_read!(infallible, pub read16_relaxed, call_mmio_read(readw_relaxed) -> u16);
- define_read!(infallible, pub read32_relaxed, call_mmio_read(readl_relaxed) -> u32);
- define_read!(
+ io_define_read!(infallible, pub read8_relaxed, call_mmio_read(readb_relaxed) -> u8);
+ io_define_read!(infallible, pub read16_relaxed, call_mmio_read(readw_relaxed) -> u16);
+ io_define_read!(infallible, pub read32_relaxed, call_mmio_read(readl_relaxed) -> u32);
+ io_define_read!(
infallible,
#[cfg(CONFIG_64BIT)]
pub read64_relaxed,
call_mmio_read(readq_relaxed) -> u64
);
- define_read!(fallible, pub try_read8_relaxed, call_mmio_read(readb_relaxed) -> u8);
- define_read!(fallible, pub try_read16_relaxed, call_mmio_read(readw_relaxed) -> u16);
- define_read!(fallible, pub try_read32_relaxed, call_mmio_read(readl_relaxed) -> u32);
- define_read!(
+ io_define_read!(fallible, pub try_read8_relaxed, call_mmio_read(readb_relaxed) -> u8);
+ io_define_read!(fallible, pub try_read16_relaxed, call_mmio_read(readw_relaxed) -> u16);
+ io_define_read!(fallible, pub try_read32_relaxed, call_mmio_read(readl_relaxed) -> u32);
+ io_define_read!(
fallible,
#[cfg(CONFIG_64BIT)]
pub try_read64_relaxed,
call_mmio_read(readq_relaxed) -> u64
);
- define_write!(infallible, pub write8_relaxed, call_mmio_write(writeb_relaxed) <- u8);
- define_write!(infallible, pub write16_relaxed, call_mmio_write(writew_relaxed) <- u16);
- define_write!(infallible, pub write32_relaxed, call_mmio_write(writel_relaxed) <- u32);
- define_write!(
+ io_define_write!(infallible, pub write8_relaxed, call_mmio_write(writeb_relaxed) <- u8);
+ io_define_write!(infallible, pub write16_relaxed, call_mmio_write(writew_relaxed) <- u16);
+ io_define_write!(infallible, pub write32_relaxed, call_mmio_write(writel_relaxed) <- u32);
+ io_define_write!(
infallible,
#[cfg(CONFIG_64BIT)]
pub write64_relaxed,
call_mmio_write(writeq_relaxed) <- u64
);
- define_write!(fallible, pub try_write8_relaxed, call_mmio_write(writeb_relaxed) <- u8);
- define_write!(fallible, pub try_write16_relaxed, call_mmio_write(writew_relaxed) <- u16);
- define_write!(fallible, pub try_write32_relaxed, call_mmio_write(writel_relaxed) <- u32);
- define_write!(
+ io_define_write!(fallible, pub try_write8_relaxed, call_mmio_write(writeb_relaxed) <- u8);
+ io_define_write!(fallible, pub try_write16_relaxed, call_mmio_write(writew_relaxed) <- u16);
+ io_define_write!(fallible, pub try_write32_relaxed, call_mmio_write(writel_relaxed) <- u32);
+ io_define_write!(
fallible,
#[cfg(CONFIG_64BIT)]
pub try_write64_relaxed,
diff --git a/rust/kernel/pci/io.rs b/rust/kernel/pci/io.rs
index 6ca4cf75594c..fb6edab2aea7 100644
--- a/rust/kernel/pci/io.rs
+++ b/rust/kernel/pci/io.rs
@@ -8,8 +8,8 @@ use crate::{
device,
devres::Devres,
io::{
- define_read,
- define_write,
+ io_define_read,
+ io_define_write,
Io,
IoCapable,
IoKnownSize,
@@ -88,7 +88,7 @@ pub struct ConfigSpace<'a, S: ConfigSpaceKind = Extended> {
/// Internal helper macros used to invoke C PCI configuration space read functions.
///
/// This macro is intended to be used by higher-level PCI configuration space access macros
-/// (define_read) and provides a unified expansion for infallible vs. fallible read semantics. It
+/// (io_define_read) and provides a unified expansion for infallible vs. fallible read semantics. It
/// emits a direct call into the corresponding C helper and performs the required cast to the Rust
/// return type.
///
@@ -117,9 +117,9 @@ macro_rules! call_config_read {
/// Internal helper macros used to invoke C PCI configuration space write functions.
///
/// This macro is intended to be used by higher-level PCI configuration space access macros
-/// (define_write) and provides a unified expansion for infallible vs. fallible read semantics. It
-/// emits a direct call into the corresponding C helper and performs the required cast to the Rust
-/// return type.
+/// (io_define_write) and provides a unified expansion for infallible vs. fallible read semantics.
+/// It emits a direct call into the corresponding C helper and performs the required cast to the
+/// Rust return type.
///
/// # Parameters
///
@@ -163,13 +163,13 @@ impl<'a, S: ConfigSpaceKind> Io for ConfigSpace<'a, S> {
// PCI configuration space does not support fallible operations.
// The default implementations from the Io trait are not used.
- define_read!(infallible, read8, call_config_read(pci_read_config_byte) -> u8);
- define_read!(infallible, read16, call_config_read(pci_read_config_word) -> u16);
- define_read!(infallible, read32, call_config_read(pci_read_config_dword) -> u32);
+ io_define_read!(infallible, read8, call_config_read(pci_read_config_byte) -> u8);
+ io_define_read!(infallible, read16, call_config_read(pci_read_config_word) -> u16);
+ io_define_read!(infallible, read32, call_config_read(pci_read_config_dword) -> u32);
- define_write!(infallible, write8, call_config_write(pci_write_config_byte) <- u8);
- define_write!(infallible, write16, call_config_write(pci_write_config_word) <- u16);
- define_write!(infallible, write32, call_config_write(pci_write_config_dword) <- u32);
+ io_define_write!(infallible, write8, call_config_write(pci_write_config_byte) <- u8);
+ io_define_write!(infallible, write16, call_config_write(pci_write_config_word) <- u16);
+ io_define_write!(infallible, write32, call_config_write(pci_write_config_dword) <- u32);
}
impl<'a, S: ConfigSpaceKind> IoKnownSize for ConfigSpace<'a, S> {
diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c
index 36053042ca77..86bb22d19629 100644
--- a/sound/hda/codecs/realtek/alc269.c
+++ b/sound/hda/codecs/realtek/alc269.c
@@ -1017,6 +1017,24 @@ static int alc269_resume(struct hda_codec *codec)
return 0;
}
+#define STARLABS_STARFIGHTER_SHUTUP_DELAY_MS 30
+
+static void starlabs_starfighter_shutup(struct hda_codec *codec)
+{
+ if (snd_hda_gen_shutup_speakers(codec))
+ msleep(STARLABS_STARFIGHTER_SHUTUP_DELAY_MS);
+}
+
+static void alc233_fixup_starlabs_starfighter(struct hda_codec *codec,
+ const struct hda_fixup *fix,
+ int action)
+{
+ struct alc_spec *spec = codec->spec;
+
+ if (action == HDA_FIXUP_ACT_PRE_PROBE)
+ spec->shutup = starlabs_starfighter_shutup;
+}
+
static void alc269_fixup_pincfg_no_hp_to_lineout(struct hda_codec *codec,
const struct hda_fixup *fix, int action)
{
@@ -4040,6 +4058,7 @@ enum {
ALC245_FIXUP_CLEVO_NOISY_MIC,
ALC269_FIXUP_VAIO_VJFH52_MIC_NO_PRESENCE,
ALC233_FIXUP_MEDION_MTL_SPK,
+ ALC233_FIXUP_STARLABS_STARFIGHTER,
ALC294_FIXUP_BASS_SPEAKER_15,
ALC283_FIXUP_DELL_HP_RESUME,
ALC294_FIXUP_ASUS_CS35L41_SPI_2,
@@ -4056,6 +4075,7 @@ enum {
ALC236_FIXUP_HP_MUTE_LED_MICMUTE_GPIO,
ALC233_FIXUP_LENOVO_GPIO2_MIC_HOTKEY,
ALC245_FIXUP_BASS_HP_DAC,
+ ALC245_FIXUP_ACER_MICMUTE_LED,
};
/* A special fixup for Lenovo C940 and Yoga Duet 7;
@@ -6499,6 +6519,10 @@ static const struct hda_fixup alc269_fixups[] = {
{ }
},
},
+ [ALC233_FIXUP_STARLABS_STARFIGHTER] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc233_fixup_starlabs_starfighter,
+ },
[ALC294_FIXUP_BASS_SPEAKER_15] = {
.type = HDA_FIXUP_FUNC,
.v.func = alc294_fixup_bass_speaker_15,
@@ -6576,6 +6600,12 @@ static const struct hda_fixup alc269_fixups[] = {
/* Borrow the DAC routing selected for those Thinkpads */
.v.func = alc285_fixup_thinkpad_x1_gen7,
},
+ [ALC245_FIXUP_ACER_MICMUTE_LED] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc285_fixup_hp_coef_micmute_led,
+ .chained = true,
+ .chain_id = ALC2XX_FIXUP_HEADSET_MIC,
+ }
};
static const struct hda_quirk alc269_fixup_tbl[] = {
@@ -6591,6 +6621,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1025, 0x079b, "Acer Aspire V5-573G", ALC282_FIXUP_ASPIRE_V5_PINS),
SND_PCI_QUIRK(0x1025, 0x080d, "Acer Aspire V5-122P", ALC269_FIXUP_ASPIRE_HEADSET_MIC),
SND_PCI_QUIRK(0x1025, 0x0840, "Acer Aspire E1", ALC269VB_FIXUP_ASPIRE_E1_COEF),
+ SND_PCI_QUIRK(0x1025, 0x0943, "Acer Aspire V3-572G", ALC269_FIXUP_ASPIRE_HEADSET_MIC),
SND_PCI_QUIRK(0x1025, 0x100c, "Acer Aspire E5-574G", ALC255_FIXUP_ACER_LIMIT_INT_MIC_BOOST),
SND_PCI_QUIRK(0x1025, 0x101c, "Acer Veriton N2510G", ALC269_FIXUP_LIFEBOOK),
SND_PCI_QUIRK(0x1025, 0x102b, "Acer Aspire C24-860", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE),
@@ -6627,6 +6658,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1025, 0x159c, "Acer Nitro 5 AN515-58", ALC2XX_FIXUP_HEADSET_MIC),
SND_PCI_QUIRK(0x1025, 0x1597, "Acer Nitro 5 AN517-55", ALC2XX_FIXUP_HEADSET_MIC),
SND_PCI_QUIRK(0x1025, 0x169a, "Acer Swift SFG16", ALC256_FIXUP_ACER_SFG16_MICMUTE_LED),
+ SND_PCI_QUIRK(0x1025, 0x171e, "Acer Nitro ANV15-51", ALC245_FIXUP_ACER_MICMUTE_LED),
SND_PCI_QUIRK(0x1025, 0x1826, "Acer Helios ZPC", ALC287_FIXUP_PREDATOR_SPK_CS35L41_I2C_2),
SND_PCI_QUIRK(0x1025, 0x182c, "Acer Helios ZPD", ALC287_FIXUP_PREDATOR_SPK_CS35L41_I2C_2),
SND_PCI_QUIRK(0x1025, 0x1844, "Acer Helios ZPS", ALC287_FIXUP_PREDATOR_SPK_CS35L41_I2C_2),
@@ -7311,7 +7343,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC),
SND_PCI_QUIRK(0x144d, 0xc169, "Samsung Notebook 9 Pen (NP930SBE-K01US)", ALC298_FIXUP_SAMSUNG_AMP),
SND_PCI_QUIRK(0x144d, 0xc176, "Samsung Notebook 9 Pro (NP930MBE-K04US)", ALC298_FIXUP_SAMSUNG_AMP),
- SND_PCI_QUIRK(0x144d, 0xc189, "Samsung Galaxy Flex Book (NT950QCG-X716)", ALC298_FIXUP_SAMSUNG_AMP),
+ SND_PCI_QUIRK(0x144d, 0xc188, "Samsung Galaxy Book Flex (NT950QCT-A38A)", ALC298_FIXUP_SAMSUNG_AMP),
+ SND_PCI_QUIRK(0x144d, 0xc189, "Samsung Galaxy Book Flex (NT950QCG-X716)", ALC298_FIXUP_SAMSUNG_AMP),
SND_PCI_QUIRK(0x144d, 0xc18a, "Samsung Galaxy Book Ion (NP930XCJ-K01US)", ALC298_FIXUP_SAMSUNG_AMP),
SND_PCI_QUIRK(0x144d, 0xc1a3, "Samsung Galaxy Book Pro (NP935XDB-KC1SE)", ALC298_FIXUP_SAMSUNG_AMP),
SND_PCI_QUIRK(0x144d, 0xc1a4, "Samsung Galaxy Book Pro 360 (NT935QBD)", ALC298_FIXUP_SAMSUNG_AMP),
@@ -7651,6 +7684,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x2782, 0x1705, "MEDION E15433", ALC269VC_FIXUP_INFINIX_Y4_MAX),
SND_PCI_QUIRK(0x2782, 0x1707, "Vaio VJFE-ADL", ALC298_FIXUP_SPK_VOLUME),
SND_PCI_QUIRK(0x2782, 0x4900, "MEDION E15443", ALC233_FIXUP_MEDION_MTL_SPK),
+ SND_PCI_QUIRK(0x7017, 0x2014, "Star Labs StarFighter", ALC233_FIXUP_STARLABS_STARFIGHTER),
SND_PCI_QUIRK(0x8086, 0x2074, "Intel NUC 8", ALC233_FIXUP_INTEL_NUC8_DMIC),
SND_PCI_QUIRK(0x8086, 0x2080, "Intel NUC 8 Rugged", ALC256_FIXUP_INTEL_NUC8_RUGGED),
SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", ALC256_FIXUP_INTEL_NUC10),
@@ -7747,6 +7781,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
{.id = ALC298_FIXUP_TPT470_DOCK_FIX, .name = "tpt470-dock-fix"},
{.id = ALC298_FIXUP_TPT470_DOCK, .name = "tpt470-dock"},
{.id = ALC233_FIXUP_LENOVO_MULTI_CODECS, .name = "dual-codecs"},
+ {.id = ALC233_FIXUP_STARLABS_STARFIGHTER, .name = "starlabs-starfighter"},
{.id = ALC700_FIXUP_INTEL_REFERENCE, .name = "alc700-ref"},
{.id = ALC269_FIXUP_SONY_VAIO, .name = "vaio"},
{.id = ALC269_FIXUP_DELL_M101Z, .name = "dell-m101z"},
diff --git a/sound/hda/codecs/side-codecs/cs35l56_hda.c b/sound/hda/codecs/side-codecs/cs35l56_hda.c
index cfc8de2ae499..1ace4beef508 100644
--- a/sound/hda/codecs/side-codecs/cs35l56_hda.c
+++ b/sound/hda/codecs/side-codecs/cs35l56_hda.c
@@ -249,7 +249,7 @@ static int cs35l56_hda_posture_put(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
struct cs35l56_hda *cs35l56 = snd_kcontrol_chip(kcontrol);
- unsigned long pos = ucontrol->value.integer.value[0];
+ long pos = ucontrol->value.integer.value[0];
bool changed;
int ret;
@@ -403,10 +403,6 @@ static void cs35l56_hda_remove_controls(struct cs35l56_hda *cs35l56)
snd_ctl_remove(cs35l56->codec->card, cs35l56->volume_ctl);
}
-static const struct cs_dsp_client_ops cs35l56_hda_client_ops = {
- /* cs_dsp requires the client to provide this even if it is empty */
-};
-
static int cs35l56_hda_request_firmware_file(struct cs35l56_hda *cs35l56,
const struct firmware **firmware, char **filename,
const char *base_name, const char *system_name,
@@ -1149,7 +1145,6 @@ int cs35l56_hda_common_probe(struct cs35l56_hda *cs35l56, int hid, int id)
cs35l56->base.cal_index = cs35l56->index;
cs35l56_init_cs_dsp(&cs35l56->base, &cs35l56->cs_dsp);
- cs35l56->cs_dsp.client_ops = &cs35l56_hda_client_ops;
if (cs35l56->base.reset_gpio) {
dev_dbg(cs35l56->base.dev, "Hard reset\n");
diff --git a/sound/hda/controllers/intel.c b/sound/hda/controllers/intel.c
index 6fddf400c4a3..3f434994c18d 100644
--- a/sound/hda/controllers/intel.c
+++ b/sound/hda/controllers/intel.c
@@ -1751,6 +1751,8 @@ static int default_bdl_pos_adj(struct azx *chip)
return 1;
case AZX_DRIVER_ZHAOXINHDMI:
return 128;
+ case AZX_DRIVER_NVIDIA:
+ return 64;
default:
return 32;
}
diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c
index f1a63475100d..7af4daeb4c6f 100644
--- a/sound/soc/amd/yc/acp6x-mach.c
+++ b/sound/soc/amd/yc/acp6x-mach.c
@@ -703,6 +703,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "Vivobook_ASUSLaptop M6501RR_M6501RR"),
}
},
+ {
+ .driver_data = &acp6x_card,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "ASUS EXPERTBOOK BM1503CDA"),
+ }
+ },
{}
};
diff --git a/sound/soc/codecs/cs42l43-jack.c b/sound/soc/codecs/cs42l43-jack.c
index b83bc4de1301..3e04e6897b14 100644
--- a/sound/soc/codecs/cs42l43-jack.c
+++ b/sound/soc/codecs/cs42l43-jack.c
@@ -699,6 +699,7 @@ static int cs42l43_run_type_detect(struct cs42l43_codec *priv)
switch (type & CS42L43_HSDET_TYPE_STS_MASK) {
case 0x0: // CTIA
case 0x1: // OMTP
+ case 0x4:
return cs42l43_run_load_detect(priv, true);
case 0x2: // 3-pole
return cs42l43_run_load_detect(priv, false);
diff --git a/sound/soc/sdca/sdca_interrupts.c b/sound/soc/sdca/sdca_interrupts.c
index d9e22cf40f77..95b1ab4ba1b0 100644
--- a/sound/soc/sdca/sdca_interrupts.c
+++ b/sound/soc/sdca/sdca_interrupts.c
@@ -265,9 +265,9 @@ static int sdca_irq_request_locked(struct device *dev,
}
/**
- * sdca_request_irq - request an individual SDCA interrupt
+ * sdca_irq_request - request an individual SDCA interrupt
* @dev: Pointer to the struct device against which things should be allocated.
- * @interrupt_info: Pointer to the interrupt information structure.
+ * @info: Pointer to the interrupt information structure.
* @sdca_irq: SDCA interrupt position.
* @name: Name to be given to the IRQ.
* @handler: A callback thread function to be called for the IRQ.
diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c
index 73bce9712dbd..bf4401aba76c 100644
--- a/sound/usb/endpoint.c
+++ b/sound/usb/endpoint.c
@@ -160,8 +160,8 @@ int snd_usb_endpoint_implicit_feedback_sink(struct snd_usb_endpoint *ep)
* This won't be used for implicit feedback which takes the packet size
* returned from the sync source
*/
-static int slave_next_packet_size(struct snd_usb_endpoint *ep,
- unsigned int avail)
+static int synced_next_packet_size(struct snd_usb_endpoint *ep,
+ unsigned int avail)
{
unsigned int phase;
int ret;
@@ -221,13 +221,14 @@ int snd_usb_endpoint_next_packet_size(struct snd_usb_endpoint *ep,
packet = ctx->packet_size[idx];
if (packet) {
+ packet = min(packet, ep->maxframesize);
if (avail && packet >= avail)
return -EAGAIN;
return packet;
}
if (ep->sync_source)
- return slave_next_packet_size(ep, avail);
+ return synced_next_packet_size(ep, avail);
else
return next_packet_size(ep, avail);
}
@@ -1378,6 +1379,9 @@ int snd_usb_endpoint_set_params(struct snd_usb_audio *chip,
return -EINVAL;
}
+ ep->packsize[0] = min(ep->packsize[0], ep->maxframesize);
+ ep->packsize[1] = min(ep->packsize[1], ep->maxframesize);
+
/* calculate the frequency in 16.16 format */
ep->freqm = ep->freqn;
ep->freqshift = INT_MIN;
diff --git a/sound/usb/format.c b/sound/usb/format.c
index 64cfe4a9d8cd..1207c507882a 100644
--- a/sound/usb/format.c
+++ b/sound/usb/format.c
@@ -305,17 +305,48 @@ static bool s1810c_valid_sample_rate(struct audioformat *fp,
}
/*
- * Many Focusrite devices supports a limited set of sampling rates per
- * altsetting. Maximum rate is exposed in the last 4 bytes of Format Type
- * descriptor which has a non-standard bLength = 10.
+ * Focusrite devices use rate pairs: 44100/48000, 88200/96000, and
+ * 176400/192000. Return true if rate is in the pair for max_rate.
+ */
+static bool focusrite_rate_pair(unsigned int rate,
+ unsigned int max_rate)
+{
+ switch (max_rate) {
+ case 48000: return rate == 44100 || rate == 48000;
+ case 96000: return rate == 88200 || rate == 96000;
+ case 192000: return rate == 176400 || rate == 192000;
+ default: return true;
+ }
+}
+
+/*
+ * Focusrite devices report all supported rates in a single clock
+ * source but only a subset is valid per altsetting.
+ *
+ * Detection uses two descriptor features:
+ *
+ * 1. Format Type descriptor bLength == 10: non-standard extension
+ * with max sample rate in bytes 6..9.
+ *
+ * 2. bmControls VAL_ALT_SETTINGS readable bit: when set, the device
+ * only supports the highest rate pair for that altsetting, and when
+ * clear, all rates up to max_rate are valid.
+ *
+ * For devices without the bLength == 10 extension but with
+ * VAL_ALT_SETTINGS readable and multiple altsettings (only seen in
+ * Scarlett 18i8 3rd Gen playback), fall back to the Focusrite
+ * convention: alt 1 = 48kHz, alt 2 = 96kHz, alt 3 = 192kHz.
*/
static bool focusrite_valid_sample_rate(struct snd_usb_audio *chip,
struct audioformat *fp,
unsigned int rate)
{
+ struct usb_interface *iface;
struct usb_host_interface *alts;
+ struct uac2_as_header_descriptor *as;
unsigned char *fmt;
unsigned int max_rate;
+ bool val_alt;
alts = snd_usb_get_host_interface(chip, fp->iface, fp->altsetting);
if (!alts)
@@ -326,9 +357,21 @@ static bool focusrite_valid_sample_rate(struct snd_usb_audio *chip,
if (!fmt)
return true;
+ as = snd_usb_find_csint_desc(alts->extra, alts->extralen,
+ NULL, UAC_AS_GENERAL);
+ if (!as)
+ return true;
+
+ val_alt = uac_v2v3_control_is_readable(as->bmControls,
+ UAC2_AS_VAL_ALT_SETTINGS);
+
if (fmt[0] == 10) { /* bLength */
max_rate = combine_quad(&fmt[6]);
+ if (val_alt)
+ return focusrite_rate_pair(rate, max_rate);
+
+ /* No val_alt: rates fall through from higher */
switch (max_rate) {
case 192000:
if (rate == 176400 || rate == 192000)
@@ -344,12 +387,29 @@ static bool focusrite_valid_sample_rate(struct snd_usb_audio *chip,
usb_audio_info(chip,
"%u:%d : unexpected max rate: %u\n",
fp->iface, fp->altsetting, max_rate);
-
return true;
}
}
- return true;
+ if (!val_alt)
+ return true;
+
+ /* Multi-altsetting device with val_alt but no max_rate
+ * in the format descriptor. Use Focusrite convention:
+ * alt 1 = 48kHz, alt 2 = 96kHz, alt 3 = 192kHz.
+ */
+ iface = usb_ifnum_to_if(chip->dev, fp->iface);
+ if (!iface || iface->num_altsetting <= 2)
+ return true;
+
+ switch (fp->altsetting) {
+ case 1: max_rate = 48000; break;
+ case 2: max_rate = 96000; break;
+ case 3: max_rate = 192000; break;
+ default: return true;
+ }
+
+ return focusrite_rate_pair(rate, max_rate);
}
/*
diff --git a/sound/usb/mixer_s1810c.c b/sound/usb/mixer_s1810c.c
index 473cb29efa7f..7eac7d1bce64 100644
--- a/sound/usb/mixer_s1810c.c
+++ b/sound/usb/mixer_s1810c.c
@@ -71,7 +71,7 @@
* * e I guess the same as with mixer
*
*/
-/** struct s1810c_ctl_packet - basic vendor request
+/* struct s1810c_ctl_packet - basic vendor request
* @selector: device/mixer/output
* @b: request-dependant field b
* @tag: fixed value identifying type of request
@@ -94,14 +94,14 @@ struct s1810c_ctl_packet {
__le32 e;
};
-/** selectors for CMD request
+/* selectors for CMD request
*/
#define SC1810C_SEL_DEVICE 0
#define SC1810C_SEL_MIXER 0x64
#define SC1810C_SEL_OUTPUT 0x65
-/** control ids */
+/* control ids */
#define SC1810C_CTL_LINE_SW 0
#define SC1810C_CTL_MUTE_SW 1
#define SC1824C_CTL_MONO_SW 2
@@ -127,7 +127,7 @@ struct s1810c_ctl_packet {
#define SC1810C_GET_STATE_TAG SC1810C_SET_STATE_TAG
#define SC1810C_GET_STATE_LEN SC1810C_SET_STATE_LEN
-/** Mixer levels normally range from 0 (off) to 0x0100 0000 (0 dB).
+/* Mixer levels normally range from 0 (off) to 0x0100 0000 (0 dB).
* raw_level = 2^24 * 10^(db_level / 20), thus
* -3dB = 0xb53bf0 (technically, half-power -3.01...dB would be 0xb504f3)
* -96dB = 0x109
@@ -145,7 +145,7 @@ struct s1810c_ctl_packet {
#define MIXER_LEVEL_N3DB 0xb53bf0
#define MIXER_LEVEL_0DB 0x1000000
-/**
+/*
* This packet includes mixer volumes and
* various other fields, it's an extended
* version of ctl_packet, with a and b
@@ -155,7 +155,7 @@ struct s1810c_state_packet {
__le32 fields[63];
};
-/** indices into s1810c_state_packet.fields[]
+/* indices into s1810c_state_packet.fields[]
*/
#define SC1810C_STATE_TAG_IDX 2
#define SC1810C_STATE_LEN_IDX 3
diff --git a/sound/usb/mixer_scarlett2.c b/sound/usb/mixer_scarlett2.c
index 85a0316889d4..ef3150581eab 100644
--- a/sound/usb/mixer_scarlett2.c
+++ b/sound/usb/mixer_scarlett2.c
@@ -1328,8 +1328,6 @@ struct scarlett2_data {
struct snd_kcontrol *mux_ctls[SCARLETT2_MUX_MAX];
struct snd_kcontrol *mix_ctls[SCARLETT2_MIX_MAX];
struct snd_kcontrol *compressor_ctls[SCARLETT2_COMPRESSOR_CTLS_MAX];
- struct snd_kcontrol *precomp_flt_ctls[SCARLETT2_PRECOMP_FLT_CTLS_MAX];
- struct snd_kcontrol *peq_flt_ctls[SCARLETT2_PEQ_FLT_CTLS_MAX];
struct snd_kcontrol *precomp_flt_switch_ctls[SCARLETT2_DSP_SWITCH_MAX];
struct snd_kcontrol *peq_flt_switch_ctls[SCARLETT2_DSP_SWITCH_MAX];
struct snd_kcontrol *direct_monitor_ctl;
@@ -3447,7 +3445,6 @@ static int scarlett2_update_autogain(struct usb_mixer_interface *mixer)
private->autogain_status[i] =
private->num_autogain_status_texts - 1;
-
for (i = 0; i < SCARLETT2_AG_TARGET_COUNT; i++)
if (scarlett2_has_config_item(private,
scarlett2_ag_target_configs[i])) {
@@ -5372,8 +5369,7 @@ static int scarlett2_update_filter_values(struct usb_mixer_interface *mixer)
err = scarlett2_usb_get_config(
mixer, SCARLETT2_CONFIG_PEQ_FLT_SWITCH,
- info->dsp_input_count * info->peq_flt_count,
- private->peq_flt_switch);
+ info->dsp_input_count, private->peq_flt_switch);
if (err < 0)
return err;
@@ -6546,7 +6542,7 @@ static int scarlett2_add_dsp_ctls(struct usb_mixer_interface *mixer, int i)
err = scarlett2_add_new_ctl(
mixer, &scarlett2_precomp_flt_ctl,
i * info->precomp_flt_count + j,
- 1, s, &private->precomp_flt_switch_ctls[j]);
+ 1, s, NULL);
if (err < 0)
return err;
}
@@ -6556,7 +6552,7 @@ static int scarlett2_add_dsp_ctls(struct usb_mixer_interface *mixer, int i)
err = scarlett2_add_new_ctl(
mixer, &scarlett2_peq_flt_ctl,
i * info->peq_flt_count + j,
- 1, s, &private->peq_flt_switch_ctls[j]);
+ 1, s, NULL);
if (err < 0)
return err;
}
diff --git a/sound/usb/qcom/qc_audio_offload.c b/sound/usb/qcom/qc_audio_offload.c
index 01e6063c2207..510b68cced33 100644
--- a/sound/usb/qcom/qc_audio_offload.c
+++ b/sound/usb/qcom/qc_audio_offload.c
@@ -1007,7 +1007,7 @@ put_suspend:
/**
* uaudio_transfer_buffer_setup() - fetch and populate xfer buffer params
* @subs: usb substream
- * @xfer_buf: xfer buf to be allocated
+ * @xfer_buf_cpu: xfer buf to be allocated
* @xfer_buf_len: size of allocation
* @mem_info: QMI response info
*
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 4cac0dfb0094..c6a78efbcaa3 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -2365,6 +2365,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER),
DEVICE_FLG(0x2040, 0x7281, /* Hauppauge HVR-950Q-MXL */
QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER),
+ DEVICE_FLG(0x20b1, 0x2009, /* XMOS Ltd DIYINHK USB Audio 2.0 */
+ QUIRK_FLAG_SKIP_IMPLICIT_FB | QUIRK_FLAG_DSD_RAW),
DEVICE_FLG(0x2040, 0x8200, /* Hauppauge Woodbury */
QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER),
DEVICE_FLG(0x21b4, 0x0081, /* AudioQuest DragonFly */
@@ -2424,7 +2426,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
VENDOR_FLG(0x07fd, /* MOTU */
QUIRK_FLAG_VALIDATE_RATES),
VENDOR_FLG(0x1235, /* Focusrite Novation */
- QUIRK_FLAG_VALIDATE_RATES),
+ QUIRK_FLAG_SKIP_CLOCK_SELECTOR |
+ QUIRK_FLAG_SKIP_IFACE_SETUP),
VENDOR_FLG(0x1511, /* AURALiC */
QUIRK_FLAG_DSD_RAW),
VENDOR_FLG(0x152a, /* Thesycon devices */
@@ -2506,6 +2509,7 @@ static const char *const snd_usb_audio_quirk_flag_names[] = {
QUIRK_STRING_ENTRY(MIC_RES_384),
QUIRK_STRING_ENTRY(MIXER_PLAYBACK_MIN_MUTE),
QUIRK_STRING_ENTRY(MIXER_CAPTURE_MIN_MUTE),
+ QUIRK_STRING_ENTRY(SKIP_IFACE_SETUP),
NULL
};
diff --git a/sound/usb/stream.c b/sound/usb/stream.c
index ac4d92065dd9..d38c39e28f38 100644
--- a/sound/usb/stream.c
+++ b/sound/usb/stream.c
@@ -1259,6 +1259,9 @@ static int __snd_usb_parse_audio_interface(struct snd_usb_audio *chip,
set_iface_first = true;
/* try to set the interface... */
+ if (chip->quirk_flags & QUIRK_FLAG_SKIP_IFACE_SETUP)
+ continue;
+
usb_set_interface(chip->dev, iface_no, 0);
if (set_iface_first)
usb_set_interface(chip->dev, iface_no, altno);
diff --git a/sound/usb/usbaudio.h b/sound/usb/usbaudio.h
index 79978cae9799..085530cf62d9 100644
--- a/sound/usb/usbaudio.h
+++ b/sound/usb/usbaudio.h
@@ -224,6 +224,10 @@ extern bool snd_usb_skip_validation;
* playback value represents muted state instead of minimum audible volume
* QUIRK_FLAG_MIXER_CAPTURE_MIN_MUTE
* Similar to QUIRK_FLAG_MIXER_PLAYBACK_MIN_MUTE, but for capture streams
+ * QUIRK_FLAG_SKIP_IFACE_SETUP
+ * Skip the probe-time interface setup (usb_set_interface,
+ * init_pitch, init_sample_rate); redundant with
+ * snd_usb_endpoint_prepare() at stream-open time
*/
enum {
@@ -253,6 +257,7 @@ enum {
QUIRK_TYPE_MIC_RES_384 = 23,
QUIRK_TYPE_MIXER_PLAYBACK_MIN_MUTE = 24,
QUIRK_TYPE_MIXER_CAPTURE_MIN_MUTE = 25,
+ QUIRK_TYPE_SKIP_IFACE_SETUP = 26,
/* Please also edit snd_usb_audio_quirk_flag_names */
};
@@ -284,5 +289,6 @@ enum {
#define QUIRK_FLAG_MIC_RES_384 QUIRK_FLAG(MIC_RES_384)
#define QUIRK_FLAG_MIXER_PLAYBACK_MIN_MUTE QUIRK_FLAG(MIXER_PLAYBACK_MIN_MUTE)
#define QUIRK_FLAG_MIXER_CAPTURE_MIN_MUTE QUIRK_FLAG(MIXER_CAPTURE_MIN_MUTE)
+#define QUIRK_FLAG_SKIP_IFACE_SETUP QUIRK_FLAG(SKIP_IFACE_SETUP)
#endif /* __USBAUDIO_H */
diff --git a/sound/usb/usx2y/us144mkii.c b/sound/usb/usx2y/us144mkii.c
index bc71968df8e2..0cf4fa74e210 100644
--- a/sound/usb/usx2y/us144mkii.c
+++ b/sound/usb/usx2y/us144mkii.c
@@ -10,8 +10,8 @@ MODULE_AUTHOR("Å erif Rami <ramiserifpersia@gmail.com>");
MODULE_DESCRIPTION("ALSA Driver for TASCAM US-144MKII");
MODULE_LICENSE("GPL");
-/**
- * @brief Module parameters for ALSA card instantiation.
+/*
+ * Module parameters for ALSA card instantiation.
*
* These parameters allow users to configure how the ALSA sound card
* for the TASCAM US-144MKII is instantiated.
@@ -269,7 +269,7 @@ void tascam_stop_work_handler(struct work_struct *work)
atomic_set(&tascam->active_urbs, 0);
}
-/**
+/*
* tascam_card_private_free() - Frees private data associated with the sound
* card.
* @card: Pointer to the ALSA sound card instance.
@@ -291,7 +291,7 @@ static void tascam_card_private_free(struct snd_card *card)
}
}
-/**
+/*
* tascam_suspend() - Handles device suspension.
* @intf: The USB interface being suspended.
* @message: Power management message.
@@ -332,7 +332,7 @@ static int tascam_suspend(struct usb_interface *intf, pm_message_t message)
return 0;
}
-/**
+/*
* tascam_resume() - Handles device resumption from suspend.
* @intf: The USB interface being resumed.
*
@@ -390,7 +390,7 @@ static void tascam_error_timer(struct timer_list *t)
schedule_work(&tascam->midi_out_work);
}
-/**
+/*
* tascam_probe() - Probes for the TASCAM US-144MKII device.
* @intf: The USB interface being probed.
* @usb_id: The USB device ID.
@@ -565,7 +565,7 @@ free_card:
return err;
}
-/**
+/*
* tascam_disconnect() - Disconnects the TASCAM US-144MKII device.
* @intf: The USB interface being disconnected.
*
diff --git a/sound/usb/usx2y/us144mkii_capture.c b/sound/usb/usx2y/us144mkii_capture.c
index 00188ff6cd51..af120bf62173 100644
--- a/sound/usb/usx2y/us144mkii_capture.c
+++ b/sound/usb/usx2y/us144mkii_capture.c
@@ -3,7 +3,7 @@
#include "us144mkii.h"
-/**
+/*
* tascam_capture_open() - Opens the PCM capture substream.
* @substream: The ALSA PCM substream to open.
*
@@ -23,7 +23,7 @@ static int tascam_capture_open(struct snd_pcm_substream *substream)
return 0;
}
-/**
+/*
* tascam_capture_close() - Closes the PCM capture substream.
* @substream: The ALSA PCM substream to close.
*
@@ -41,7 +41,7 @@ static int tascam_capture_close(struct snd_pcm_substream *substream)
return 0;
}
-/**
+/*
* tascam_capture_prepare() - Prepares the PCM capture substream for use.
* @substream: The ALSA PCM substream to prepare.
*
@@ -62,7 +62,7 @@ static int tascam_capture_prepare(struct snd_pcm_substream *substream)
return 0;
}
-/**
+/*
* tascam_capture_pointer() - Returns the current capture pointer position.
* @substream: The ALSA PCM substream.
*
@@ -91,7 +91,7 @@ tascam_capture_pointer(struct snd_pcm_substream *substream)
return do_div(pos, runtime->buffer_size);
}
-/**
+/*
* tascam_capture_ops - ALSA PCM operations for capture.
*
* This structure defines the callback functions for capture stream operations,
@@ -109,7 +109,7 @@ const struct snd_pcm_ops tascam_capture_ops = {
.pointer = tascam_capture_pointer,
};
-/**
+/*
* decode_tascam_capture_block() - Decodes a raw 512-byte block from the device.
* @src_block: Pointer to the 512-byte raw source block.
* @dst_block: Pointer to the destination buffer for decoded audio frames.
diff --git a/sound/usb/usx2y/us144mkii_controls.c b/sound/usb/usx2y/us144mkii_controls.c
index 62055fb8e7ba..81ded11e3709 100644
--- a/sound/usb/usx2y/us144mkii_controls.c
+++ b/sound/usb/usx2y/us144mkii_controls.c
@@ -3,8 +3,8 @@
#include "us144mkii.h"
-/**
- * @brief Text descriptions for playback output source options.
+/*
+ * Text descriptions for playback output source options.
*
* Used by ALSA kcontrol elements to provide user-friendly names for
* the playback routing options (e.g., "Playback 1-2", "Playback 3-4").
@@ -12,15 +12,15 @@
static const char *const playback_source_texts[] = { "Playback 1-2",
"Playback 3-4" };
-/**
- * @brief Text descriptions for capture input source options.
+/*
+ * Text descriptions for capture input source options.
*
* Used by ALSA kcontrol elements to provide user-friendly names for
* the capture routing options (e.g., "Analog In", "Digital In").
*/
static const char *const capture_source_texts[] = { "Analog In", "Digital In" };
-/**
+/*
* tascam_playback_source_info() - ALSA control info callback for playback
* source.
* @kcontrol: The ALSA kcontrol instance.
@@ -38,7 +38,7 @@ static int tascam_playback_source_info(struct snd_kcontrol *kcontrol,
return snd_ctl_enum_info(uinfo, 1, 2, playback_source_texts);
}
-/**
+/*
* tascam_line_out_get() - ALSA control get callback for Line Outputs Source.
* @kcontrol: The ALSA kcontrol instance.
* @ucontrol: The ALSA control element value structure to fill.
@@ -60,7 +60,7 @@ static int tascam_line_out_get(struct snd_kcontrol *kcontrol,
return 0;
}
-/**
+/*
* tascam_line_out_put() - ALSA control put callback for Line Outputs Source.
* @kcontrol: The ALSA kcontrol instance.
* @ucontrol: The ALSA control element value structure containing the new value.
@@ -89,7 +89,7 @@ static int tascam_line_out_put(struct snd_kcontrol *kcontrol,
return changed;
}
-/**
+/*
* tascam_line_out_control - ALSA kcontrol definition for Line Outputs Source.
*
* This defines a new ALSA mixer control named "Line OUTPUTS Source" that allows
@@ -106,7 +106,7 @@ static const struct snd_kcontrol_new tascam_line_out_control = {
.put = tascam_line_out_put,
};
-/**
+/*
* tascam_digital_out_get() - ALSA control get callback for Digital Outputs
* Source.
* @kcontrol: The ALSA kcontrol instance.
@@ -129,7 +129,7 @@ static int tascam_digital_out_get(struct snd_kcontrol *kcontrol,
return 0;
}
-/**
+/*
* tascam_digital_out_put() - ALSA control put callback for Digital Outputs
* Source.
* @kcontrol: The ALSA kcontrol instance.
@@ -159,7 +159,7 @@ static int tascam_digital_out_put(struct snd_kcontrol *kcontrol,
return changed;
}
-/**
+/*
* tascam_digital_out_control - ALSA kcontrol definition for Digital Outputs
* Source.
*
@@ -177,7 +177,7 @@ static const struct snd_kcontrol_new tascam_digital_out_control = {
.put = tascam_digital_out_put,
};
-/**
+/*
* tascam_capture_source_info() - ALSA control info callback for capture source.
* @kcontrol: The ALSA kcontrol instance.
* @uinfo: The ALSA control element info structure to fill.
@@ -194,7 +194,7 @@ static int tascam_capture_source_info(struct snd_kcontrol *kcontrol,
return snd_ctl_enum_info(uinfo, 1, 2, capture_source_texts);
}
-/**
+/*
* tascam_capture_12_get() - ALSA control get callback for Capture channels 1
* and 2 Source.
* @kcontrol: The ALSA kcontrol instance.
@@ -217,7 +217,7 @@ static int tascam_capture_12_get(struct snd_kcontrol *kcontrol,
return 0;
}
-/**
+/*
* tascam_capture_12_put() - ALSA control put callback for Capture channels 1
* and 2 Source.
* @kcontrol: The ALSA kcontrol instance.
@@ -247,7 +247,7 @@ static int tascam_capture_12_put(struct snd_kcontrol *kcontrol,
return changed;
}
-/**
+/*
* tascam_capture_12_control - ALSA kcontrol definition for Capture channels 1
* and 2 Source.
*
@@ -265,7 +265,7 @@ static const struct snd_kcontrol_new tascam_capture_12_control = {
.put = tascam_capture_12_put,
};
-/**
+/*
* tascam_capture_34_get() - ALSA control get callback for Capture channels 3
* and 4 Source.
* @kcontrol: The ALSA kcontrol instance.
@@ -288,7 +288,7 @@ static int tascam_capture_34_get(struct snd_kcontrol *kcontrol,
return 0;
}
-/**
+/*
* tascam_capture_34_put() - ALSA control put callback for Capture channels 3
* and 4 Source.
* @kcontrol: The ALSA kcontrol instance.
@@ -318,7 +318,7 @@ static int tascam_capture_34_put(struct snd_kcontrol *kcontrol,
return changed;
}
-/**
+/*
* tascam_capture_34_control - ALSA kcontrol definition for Capture channels 3
* and 4 Source.
*
@@ -336,7 +336,7 @@ static const struct snd_kcontrol_new tascam_capture_34_control = {
.put = tascam_capture_34_put,
};
-/**
+/*
* tascam_samplerate_info() - ALSA control info callback for Sample Rate.
* @kcontrol: The ALSA kcontrol instance.
* @uinfo: The ALSA control element info structure to fill.
@@ -356,7 +356,7 @@ static int tascam_samplerate_info(struct snd_kcontrol *kcontrol,
return 0;
}
-/**
+/*
* tascam_samplerate_get() - ALSA control get callback for Sample Rate.
* @kcontrol: The ALSA kcontrol instance.
* @ucontrol: The ALSA control element value structure to fill.
@@ -400,7 +400,7 @@ static int tascam_samplerate_get(struct snd_kcontrol *kcontrol,
return 0;
}
-/**
+/*
* tascam_samplerate_control - ALSA kcontrol definition for Sample Rate.
*
* This defines a new ALSA mixer control named "Sample Rate" that displays
diff --git a/sound/usb/usx2y/us144mkii_midi.c b/sound/usb/usx2y/us144mkii_midi.c
index ed2afec2a89a..4871797b1670 100644
--- a/sound/usb/usx2y/us144mkii_midi.c
+++ b/sound/usb/usx2y/us144mkii_midi.c
@@ -3,7 +3,7 @@
#include "us144mkii.h"
-/**
+/*
* tascam_midi_in_work_handler() - Deferred work for processing MIDI input.
* @work: The work_struct instance.
*
@@ -75,7 +75,7 @@ out:
usb_put_urb(urb);
}
-/**
+/*
* tascam_midi_in_open() - Opens the MIDI input substream.
* @substream: The ALSA rawmidi substream to open.
*
@@ -92,7 +92,7 @@ static int tascam_midi_in_open(struct snd_rawmidi_substream *substream)
return 0;
}
-/**
+/*
* tascam_midi_in_close() - Closes the MIDI input substream.
* @substream: The ALSA rawmidi substream to close.
*
@@ -103,7 +103,7 @@ static int tascam_midi_in_close(struct snd_rawmidi_substream *substream)
return 0;
}
-/**
+/*
* tascam_midi_in_trigger() - Triggers MIDI input stream activity.
* @substream: The ALSA rawmidi substream.
* @up: Boolean indicating whether to start (1) or stop (0) the stream.
@@ -150,7 +150,7 @@ static void tascam_midi_in_trigger(struct snd_rawmidi_substream *substream,
}
}
-/**
+/*
* tascam_midi_in_ops - ALSA rawmidi operations for MIDI input.
*
* This structure defines the callback functions for MIDI input stream
@@ -205,7 +205,7 @@ out:
usb_put_urb(urb);
}
-/**
+/*
* tascam_midi_out_work_handler() - Deferred work for sending MIDI data
* @work: The work_struct instance.
*
@@ -282,7 +282,7 @@ static void tascam_midi_out_work_handler(struct work_struct *work)
}
}
-/**
+/*
* tascam_midi_out_open() - Opens the MIDI output substream.
* @substream: The ALSA rawmidi substream to open.
*
@@ -301,7 +301,7 @@ static int tascam_midi_out_open(struct snd_rawmidi_substream *substream)
return 0;
}
-/**
+/*
* tascam_midi_out_close() - Closes the MIDI output substream.
* @substream: The ALSA rawmidi substream to close.
*
@@ -312,7 +312,7 @@ static int tascam_midi_out_close(struct snd_rawmidi_substream *substream)
return 0;
}
-/**
+/*
* tascam_midi_out_drain() - Drains the MIDI output stream.
* @substream: The ALSA rawmidi substream.
*
@@ -340,7 +340,7 @@ static void tascam_midi_out_drain(struct snd_rawmidi_substream *substream)
usb_kill_anchored_urbs(&tascam->midi_out_anchor);
}
-/**
+/*
* tascam_midi_out_trigger() - Triggers MIDI output stream activity.
* @substream: The ALSA rawmidi substream.
* @up: Boolean indicating whether to start (1) or stop (0) the stream.
@@ -361,7 +361,7 @@ static void tascam_midi_out_trigger(struct snd_rawmidi_substream *substream,
}
}
-/**
+/*
* tascam_midi_out_ops - ALSA rawmidi operations for MIDI output.
*
* This structure defines the callback functions for MIDI output stream
diff --git a/sound/usb/usx2y/us144mkii_playback.c b/sound/usb/usx2y/us144mkii_playback.c
index 0cb9699ec211..7efaca0a6489 100644
--- a/sound/usb/usx2y/us144mkii_playback.c
+++ b/sound/usb/usx2y/us144mkii_playback.c
@@ -3,7 +3,7 @@
#include "us144mkii.h"
-/**
+/*
* tascam_playback_open() - Opens the PCM playback substream.
* @substream: The ALSA PCM substream to open.
*
@@ -23,7 +23,7 @@ static int tascam_playback_open(struct snd_pcm_substream *substream)
return 0;
}
-/**
+/*
* tascam_playback_close() - Closes the PCM playback substream.
* @substream: The ALSA PCM substream to close.
*
@@ -41,7 +41,7 @@ static int tascam_playback_close(struct snd_pcm_substream *substream)
return 0;
}
-/**
+/*
* tascam_playback_prepare() - Prepares the PCM playback substream for use.
* @substream: The ALSA PCM substream to prepare.
*
@@ -108,7 +108,7 @@ static int tascam_playback_prepare(struct snd_pcm_substream *substream)
return 0;
}
-/**
+/*
* tascam_playback_pointer() - Returns the current playback pointer position.
* @substream: The ALSA PCM substream.
*
@@ -137,7 +137,7 @@ tascam_playback_pointer(struct snd_pcm_substream *substream)
return do_div(pos, runtime->buffer_size);
}
-/**
+/*
* tascam_playback_ops - ALSA PCM operations for playback.
*
* This structure defines the callback functions for playback stream operations,
diff --git a/sound/usb/validate.c b/sound/usb/validate.c
index 4bb4893f6e74..f62b7cc041dc 100644
--- a/sound/usb/validate.c
+++ b/sound/usb/validate.c
@@ -281,7 +281,7 @@ static const struct usb_desc_validator audio_validators[] = {
/* UAC_VERSION_2, UAC2_SAMPLE_RATE_CONVERTER: not implemented yet */
/* UAC3 */
- FIXED(UAC_VERSION_2, UAC_HEADER, struct uac3_ac_header_descriptor),
+ FIXED(UAC_VERSION_3, UAC_HEADER, struct uac3_ac_header_descriptor),
FIXED(UAC_VERSION_3, UAC_INPUT_TERMINAL,
struct uac3_input_terminal_descriptor),
FIXED(UAC_VERSION_3, UAC_OUTPUT_TERMINAL,
diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile
index 1733a6e93a07..ef083602b73a 100644
--- a/tools/bpf/resolve_btfids/Makefile
+++ b/tools/bpf/resolve_btfids/Makefile
@@ -65,6 +65,9 @@ $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OU
LIBELF_FLAGS := $(shell $(HOSTPKG_CONFIG) libelf --cflags 2>/dev/null)
LIBELF_LIBS := $(shell $(HOSTPKG_CONFIG) libelf --libs 2>/dev/null || echo -lelf)
+ZLIB_LIBS := $(shell $(HOSTPKG_CONFIG) zlib --libs 2>/dev/null || echo -lz)
+ZSTD_LIBS := $(shell $(HOSTPKG_CONFIG) libzstd --libs 2>/dev/null || echo -lzstd)
+
HOSTCFLAGS_resolve_btfids += -g \
-I$(srctree)/tools/include \
-I$(srctree)/tools/include/uapi \
@@ -73,7 +76,7 @@ HOSTCFLAGS_resolve_btfids += -g \
$(LIBELF_FLAGS) \
-Wall -Werror
-LIBS = $(LIBELF_LIBS) -lz
+LIBS = $(LIBELF_LIBS) $(ZLIB_LIBS) $(ZSTD_LIBS)
export srctree OUTPUT HOSTCFLAGS_resolve_btfids Q HOSTCC HOSTLD HOSTAR
include $(srctree)/tools/build/Makefile.include
@@ -83,7 +86,7 @@ $(BINARY_IN): fixdep FORCE prepare | $(OUTPUT)
$(BINARY): $(BPFOBJ) $(SUBCMDOBJ) $(BINARY_IN)
$(call msg,LINK,$@)
- $(Q)$(HOSTCC) $(BINARY_IN) $(KBUILD_HOSTLDFLAGS) -o $@ $(BPFOBJ) $(SUBCMDOBJ) $(LIBS)
+ $(Q)$(HOSTCC) $(BINARY_IN) $(KBUILD_HOSTLDFLAGS) $(EXTRA_LDFLAGS) -o $@ $(BPFOBJ) $(SUBCMDOBJ) $(LIBS)
clean_objects := $(wildcard $(OUTPUT)/*.o \
$(OUTPUT)/.*.o.cmd \
diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c
index ca7fcd03efb6..5208f650080f 100644
--- a/tools/bpf/resolve_btfids/main.c
+++ b/tools/bpf/resolve_btfids/main.c
@@ -226,7 +226,7 @@ static struct btf_id *btf_id__find(struct rb_root *root, const char *name)
}
static struct btf_id *__btf_id__add(struct rb_root *root,
- char *name,
+ const char *name,
enum btf_id_kind kind,
bool unique)
{
@@ -250,7 +250,11 @@ static struct btf_id *__btf_id__add(struct rb_root *root,
id = zalloc(sizeof(*id));
if (id) {
pr_debug("adding symbol %s\n", name);
- id->name = name;
+ id->name = strdup(name);
+ if (!id->name) {
+ free(id);
+ return NULL;
+ }
id->kind = kind;
rb_link_node(&id->rb_node, parent, p);
rb_insert_color(&id->rb_node, root);
@@ -258,17 +262,21 @@ static struct btf_id *__btf_id__add(struct rb_root *root,
return id;
}
-static inline struct btf_id *btf_id__add(struct rb_root *root, char *name, enum btf_id_kind kind)
+static inline struct btf_id *btf_id__add(struct rb_root *root,
+ const char *name,
+ enum btf_id_kind kind)
{
return __btf_id__add(root, name, kind, false);
}
-static inline struct btf_id *btf_id__add_unique(struct rb_root *root, char *name, enum btf_id_kind kind)
+static inline struct btf_id *btf_id__add_unique(struct rb_root *root,
+ const char *name,
+ enum btf_id_kind kind)
{
return __btf_id__add(root, name, kind, true);
}
-static char *get_id(const char *prefix_end)
+static int get_id(const char *prefix_end, char *buf, size_t buf_sz)
{
/*
* __BTF_ID__func__vfs_truncate__0
@@ -277,28 +285,28 @@ static char *get_id(const char *prefix_end)
*/
int len = strlen(prefix_end);
int pos = sizeof("__") - 1;
- char *p, *id;
+ char *p;
if (pos >= len)
- return NULL;
+ return -1;
- id = strdup(prefix_end + pos);
- if (id) {
- /*
- * __BTF_ID__func__vfs_truncate__0
- * id = ^
- *
- * cut the unique id part
- */
- p = strrchr(id, '_');
- p--;
- if (*p != '_') {
- free(id);
- return NULL;
- }
- *p = '\0';
- }
- return id;
+ if (len - pos >= buf_sz)
+ return -1;
+
+ strcpy(buf, prefix_end + pos);
+ /*
+ * __BTF_ID__func__vfs_truncate__0
+ * buf = ^
+ *
+ * cut the unique id part
+ */
+ p = strrchr(buf, '_');
+ p--;
+ if (*p != '_')
+ return -1;
+ *p = '\0';
+
+ return 0;
}
static struct btf_id *add_set(struct object *obj, char *name, enum btf_id_kind kind)
@@ -335,10 +343,9 @@ static struct btf_id *add_set(struct object *obj, char *name, enum btf_id_kind k
static struct btf_id *add_symbol(struct rb_root *root, char *name, size_t size)
{
- char *id;
+ char id[KSYM_NAME_LEN];
- id = get_id(name + size);
- if (!id) {
+ if (get_id(name + size, id, sizeof(id))) {
pr_err("FAILED to parse symbol name: %s\n", name);
return NULL;
}
@@ -346,6 +353,21 @@ static struct btf_id *add_symbol(struct rb_root *root, char *name, size_t size)
return btf_id__add(root, id, BTF_ID_KIND_SYM);
}
+static void btf_id__free_all(struct rb_root *root)
+{
+ struct rb_node *next;
+ struct btf_id *id;
+
+ next = rb_first(root);
+ while (next) {
+ id = rb_entry(next, struct btf_id, rb_node);
+ next = rb_next(&id->rb_node);
+ rb_erase(&id->rb_node, root);
+ free(id->name);
+ free(id);
+ }
+}
+
static void bswap_32_data(void *data, u32 nr_bytes)
{
u32 cnt, i;
@@ -1547,6 +1569,11 @@ dump_btf:
out:
btf__free(obj.base_btf);
btf__free(obj.btf);
+ btf_id__free_all(&obj.structs);
+ btf_id__free_all(&obj.unions);
+ btf_id__free_all(&obj.typedefs);
+ btf_id__free_all(&obj.funcs);
+ btf_id__free_all(&obj.sets);
if (obj.efile.elf) {
elf_end(obj.efile.elf);
close(obj.efile.fd);
diff --git a/tools/include/linux/args.h b/tools/include/linux/args.h
index 2e8e65d975c7..14b268f2389a 100644
--- a/tools/include/linux/args.h
+++ b/tools/include/linux/args.h
@@ -22,7 +22,11 @@
#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
/* Concatenate two parameters, but allow them to be expanded beforehand. */
+#ifndef __CONCAT
#define __CONCAT(a, b) a ## b
+#endif
+#ifndef CONCATENATE
#define CONCATENATE(a, b) __CONCAT(a, b)
+#endif
#endif /* _LINUX_ARGS_H */
diff --git a/tools/sched_ext/Kconfig b/tools/sched_ext/Kconfig
new file mode 100644
index 000000000000..275bd97ae62b
--- /dev/null
+++ b/tools/sched_ext/Kconfig
@@ -0,0 +1,61 @@
+# sched-ext mandatory options
+#
+CONFIG_BPF=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_BPF_JIT=y
+CONFIG_DEBUG_INFO_BTF=y
+CONFIG_BPF_JIT_ALWAYS_ON=y
+CONFIG_BPF_JIT_DEFAULT_ON=y
+CONFIG_SCHED_CLASS_EXT=y
+
+# Required by some rust schedulers (e.g. scx_p2dq)
+#
+CONFIG_KALLSYMS_ALL=y
+
+# Required on arm64
+#
+# CONFIG_DEBUG_INFO_REDUCED is not set
+
+# LAVD tracks futex to give an additional time slice for futex holder
+# (i.e., avoiding lock holder preemption) for better system-wide progress.
+# LAVD first tries to use ftrace to trace futex function calls.
+# If that is not available, it tries to use a tracepoint.
+CONFIG_FUNCTION_TRACER=y
+
+# Enable scheduling debugging
+#
+CONFIG_SCHED_DEBUG=y
+
+# Enable extra scheduling features (for a better code coverage while testing
+# the schedulers)
+#
+CONFIG_SCHED_AUTOGROUP=y
+CONFIG_SCHED_CORE=y
+CONFIG_SCHED_MC=y
+
+# Enable fully preemptible kernel for a better test coverage of the schedulers
+#
+# CONFIG_PREEMPT_NONE is not set
+# CONFIG_PREEMPT_VOLUNTARY is not set
+CONFIG_PREEMPT=y
+CONFIG_PREEMPT_DYNAMIC=y
+
+# Additional debugging information (useful to catch potential locking issues)
+CONFIG_DEBUG_LOCKDEP=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_PROVE_LOCKING=y
+
+# Bpftrace headers (for additional debug info)
+CONFIG_BPF_EVENTS=y
+CONFIG_FTRACE_SYSCALLS=y
+CONFIG_DYNAMIC_FTRACE=y
+CONFIG_KPROBES=y
+CONFIG_KPROBE_EVENTS=y
+CONFIG_UPROBES=y
+CONFIG_UPROBE_EVENTS=y
+CONFIG_DEBUG_FS=y
+
+# Enable access to kernel configuration and headers at runtime
+CONFIG_IKHEADERS=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_IKCONFIG=y
diff --git a/tools/sched_ext/Makefile b/tools/sched_ext/Makefile
index 47ad7444677e..21554f089692 100644
--- a/tools/sched_ext/Makefile
+++ b/tools/sched_ext/Makefile
@@ -122,6 +122,8 @@ BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) \
-I../../include \
$(call get_sys_includes,$(CLANG)) \
-Wall -Wno-compare-distinct-pointer-types \
+ -Wno-microsoft-anon-tag \
+ -fms-extensions \
-O2 -mcpu=v3
# sort removes libbpf duplicates when not cross-building
diff --git a/tools/sched_ext/README.md b/tools/sched_ext/README.md
index 56a9d1557ac4..6e282bce453c 100644
--- a/tools/sched_ext/README.md
+++ b/tools/sched_ext/README.md
@@ -58,14 +58,8 @@ CONFIG_SCHED_CLASS_EXT=y
CONFIG_BPF_SYSCALL=y
CONFIG_BPF_JIT=y
CONFIG_DEBUG_INFO_BTF=y
-```
-
-It's also recommended that you also include the following Kconfig options:
-
-```
CONFIG_BPF_JIT_ALWAYS_ON=y
CONFIG_BPF_JIT_DEFAULT_ON=y
-CONFIG_PAHOLE_HAS_BTF_TAG=y
```
There is a `Kconfig` file in this directory whose contents you can append to
diff --git a/tools/sched_ext/include/scx/compat.h b/tools/sched_ext/include/scx/compat.h
index 8b4897fc8b99..edccc99c7294 100644
--- a/tools/sched_ext/include/scx/compat.h
+++ b/tools/sched_ext/include/scx/compat.h
@@ -125,6 +125,7 @@ static inline long scx_hotplug_seq(void)
{
int fd;
char buf[32];
+ char *endptr;
ssize_t len;
long val;
@@ -137,8 +138,10 @@ static inline long scx_hotplug_seq(void)
buf[len] = 0;
close(fd);
- val = strtoul(buf, NULL, 10);
- SCX_BUG_ON(val < 0, "invalid num hotplug events: %lu", val);
+ errno = 0;
+ val = strtoul(buf, &endptr, 10);
+ SCX_BUG_ON(errno == ERANGE || endptr == buf ||
+ (*endptr != '\n' && *endptr != '\0'), "invalid num hotplug events: %ld", val);
return val;
}
diff --git a/tools/sched_ext/scx_central.c b/tools/sched_ext/scx_central.c
index 2a805f1d6c8f..710fa03376e2 100644
--- a/tools/sched_ext/scx_central.c
+++ b/tools/sched_ext/scx_central.c
@@ -66,7 +66,7 @@ restart:
assert(skel->rodata->nr_cpu_ids > 0);
assert(skel->rodata->nr_cpu_ids <= INT32_MAX);
- while ((opt = getopt(argc, argv, "s:c:pvh")) != -1) {
+ while ((opt = getopt(argc, argv, "s:c:vh")) != -1) {
switch (opt) {
case 's':
skel->rodata->slice_ns = strtoull(optarg, NULL, 0) * 1000;
diff --git a/tools/sched_ext/scx_sdt.c b/tools/sched_ext/scx_sdt.c
index d8ca9aa316a5..a36405d8df30 100644
--- a/tools/sched_ext/scx_sdt.c
+++ b/tools/sched_ext/scx_sdt.c
@@ -54,7 +54,7 @@ restart:
optind = 1;
skel = SCX_OPS_OPEN(sdt_ops, scx_sdt);
- while ((opt = getopt(argc, argv, "fvh")) != -1) {
+ while ((opt = getopt(argc, argv, "vh")) != -1) {
switch (opt) {
case 'v':
verbose = true;
diff --git a/tools/testing/selftests/arm64/signal/testcases/gcs_prot_none_fault.c b/tools/testing/selftests/arm64/signal/testcases/gcs_prot_none_fault.c
new file mode 100644
index 000000000000..2259f454a202
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/gcs_prot_none_fault.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2026 ARM Limited
+ */
+
+#include <errno.h>
+#include <signal.h>
+#include <unistd.h>
+
+#include <sys/mman.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+static uint64_t *gcs_page;
+static bool post_mprotect;
+
+#ifndef __NR_map_shadow_stack
+#define __NR_map_shadow_stack 453
+#endif
+
+static bool alloc_gcs(struct tdescr *td)
+{
+ long page_size = sysconf(_SC_PAGE_SIZE);
+
+ gcs_page = (void *)syscall(__NR_map_shadow_stack, 0,
+ page_size, 0);
+ if (gcs_page == MAP_FAILED) {
+ fprintf(stderr, "Failed to map %ld byte GCS: %d\n",
+ page_size, errno);
+ return false;
+ }
+
+ return true;
+}
+
+static int gcs_prot_none_fault_trigger(struct tdescr *td)
+{
+ /* Verify that the page is readable (ie, not completely unmapped) */
+ fprintf(stderr, "Read value 0x%lx\n", gcs_page[0]);
+
+ if (mprotect(gcs_page, sysconf(_SC_PAGE_SIZE), PROT_NONE) != 0) {
+ fprintf(stderr, "mprotect(PROT_NONE) failed: %d\n", errno);
+ return 0;
+ }
+ post_mprotect = true;
+
+ /* This should trigger a fault if PROT_NONE is honoured for the GCS page */
+ fprintf(stderr, "Read value after mprotect(PROT_NONE) 0x%lx\n", gcs_page[0]);
+ return 0;
+}
+
+static int gcs_prot_none_fault_signal(struct tdescr *td, siginfo_t *si,
+ ucontext_t *uc)
+{
+ ASSERT_GOOD_CONTEXT(uc);
+
+ /* A fault before mprotect(PROT_NONE) is unexpected. */
+ if (!post_mprotect)
+ return 0;
+
+ return 1;
+}
+
+struct tdescr tde = {
+ .name = "GCS PROT_NONE fault",
+ .descr = "Read from GCS after mprotect(PROT_NONE) segfaults",
+ .feats_required = FEAT_GCS,
+ .timeout = 3,
+ .sig_ok = SIGSEGV,
+ .sanity_disabled = true,
+ .init = alloc_gcs,
+ .trigger = gcs_prot_none_fault_trigger,
+ .run = gcs_prot_none_fault_signal,
+};
diff --git a/tools/testing/selftests/bpf/DENYLIST.asan b/tools/testing/selftests/bpf/DENYLIST.asan
new file mode 100644
index 000000000000..d7fe372a2293
--- /dev/null
+++ b/tools/testing/selftests/bpf/DENYLIST.asan
@@ -0,0 +1,3 @@
+*arena*
+task_local_data
+uprobe_multi_test
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 6776158f1f3e..72a9ba41f95e 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -27,7 +27,11 @@ ifneq ($(wildcard $(GENHDR)),)
endif
BPF_GCC ?= $(shell command -v bpf-gcc;)
+ifdef ASAN
+SAN_CFLAGS ?= -fsanitize=address -fno-omit-frame-pointer
+else
SAN_CFLAGS ?=
+endif
SAN_LDFLAGS ?= $(SAN_CFLAGS)
RELEASE ?=
OPT_FLAGS ?= $(if $(RELEASE),-O2,-O0)
@@ -326,8 +330,8 @@ $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \
$(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/bpftool
$(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \
ARCH= CROSS_COMPILE= CC="$(HOSTCC)" LD="$(HOSTLD)" \
- EXTRA_CFLAGS='-g $(OPT_FLAGS) $(EXTRA_CFLAGS)' \
- EXTRA_LDFLAGS='$(EXTRA_LDFLAGS)' \
+ EXTRA_CFLAGS='-g $(OPT_FLAGS) $(SAN_CFLAGS) $(EXTRA_CFLAGS)' \
+ EXTRA_LDFLAGS='$(SAN_LDFLAGS) $(EXTRA_LDFLAGS)' \
OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \
LIBBPF_OUTPUT=$(HOST_BUILD_DIR)/libbpf/ \
LIBBPF_DESTDIR=$(HOST_SCRATCH_DIR)/ \
@@ -338,8 +342,8 @@ $(CROSS_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \
$(BPFOBJ) | $(BUILD_DIR)/bpftool
$(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \
ARCH=$(ARCH) CROSS_COMPILE=$(CROSS_COMPILE) \
- EXTRA_CFLAGS='-g $(OPT_FLAGS) $(EXTRA_CFLAGS)' \
- EXTRA_LDFLAGS='$(EXTRA_LDFLAGS)' \
+ EXTRA_CFLAGS='-g $(OPT_FLAGS) $(SAN_CFLAGS) $(EXTRA_CFLAGS)' \
+ EXTRA_LDFLAGS='$(SAN_LDFLAGS) $(EXTRA_LDFLAGS)' \
OUTPUT=$(BUILD_DIR)/bpftool/ \
LIBBPF_OUTPUT=$(BUILD_DIR)/libbpf/ \
LIBBPF_DESTDIR=$(SCRATCH_DIR)/ \
@@ -404,6 +408,7 @@ $(RESOLVE_BTFIDS): $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/resolve_btfids \
$(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/resolve_btfids \
CC="$(HOSTCC)" LD="$(HOSTLD)" AR="$(HOSTAR)" \
LIBBPF_INCLUDE=$(HOST_INCLUDE_DIR) \
+ EXTRA_LDFLAGS='$(SAN_LDFLAGS) $(EXTRA_LDFLAGS)' \
OUTPUT=$(HOST_BUILD_DIR)/resolve_btfids/ BPFOBJ=$(HOST_BPFOBJ)
# Get Clang's default includes on this system, as opposed to those seen by
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index aeec9edd3851..f74b313d6ae4 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -230,8 +230,8 @@ static void trigger_fentry_setup(void)
static void attach_ksyms_all(struct bpf_program *empty, bool kretprobe)
{
LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
- char **syms = NULL;
- size_t cnt = 0;
+ struct bpf_link *link = NULL;
+ struct ksyms *ksyms = NULL;
/* Some recursive functions will be skipped in
* bpf_get_ksyms -> skip_entry, as they can introduce sufficient
@@ -241,16 +241,18 @@ static void attach_ksyms_all(struct bpf_program *empty, bool kretprobe)
* So, don't run the kprobe-multi-all and kretprobe-multi-all on
* a debug kernel.
*/
- if (bpf_get_ksyms(&syms, &cnt, true)) {
+ if (bpf_get_ksyms(&ksyms, true)) {
fprintf(stderr, "failed to get ksyms\n");
exit(1);
}
- opts.syms = (const char **) syms;
- opts.cnt = cnt;
+ opts.syms = (const char **)ksyms->filtered_syms;
+ opts.cnt = ksyms->filtered_cnt;
opts.retprobe = kretprobe;
/* attach empty to all the kernel functions except bpf_get_numa_node_id. */
- if (!bpf_program__attach_kprobe_multi_opts(empty, NULL, &opts)) {
+ link = bpf_program__attach_kprobe_multi_opts(empty, NULL, &opts);
+ free_kallsyms_local(ksyms);
+ if (!link) {
fprintf(stderr, "failed to attach bpf_program__attach_kprobe_multi_opts to all\n");
exit(1);
}
diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h
index 4bc2d25f33e1..6cb56501a505 100644
--- a/tools/testing/selftests/bpf/bpf_util.h
+++ b/tools/testing/selftests/bpf/bpf_util.h
@@ -8,6 +8,7 @@
#include <errno.h>
#include <syscall.h>
#include <bpf/libbpf.h> /* libbpf_num_possible_cpus */
+#include <linux/args.h>
static inline unsigned int bpf_num_possible_cpus(void)
{
@@ -21,25 +22,43 @@ static inline unsigned int bpf_num_possible_cpus(void)
return possible_cpus;
}
-/* Copy up to sz - 1 bytes from zero-terminated src string and ensure that dst
- * is zero-terminated string no matter what (unless sz == 0, in which case
- * it's a no-op). It's conceptually close to FreeBSD's strlcpy(), but differs
- * in what is returned. Given this is internal helper, it's trivial to extend
- * this, when necessary. Use this instead of strncpy inside libbpf source code.
+/*
+ * Simplified strscpy() implementation. The kernel one is in lib/string.c
*/
-static inline void bpf_strlcpy(char *dst, const char *src, size_t sz)
+static inline ssize_t sized_strscpy(char *dest, const char *src, size_t count)
{
- size_t i;
+ long res = 0;
- if (sz == 0)
- return;
+ if (count == 0)
+ return -E2BIG;
- sz--;
- for (i = 0; i < sz && src[i]; i++)
- dst[i] = src[i];
- dst[i] = '\0';
+ while (count > 1) {
+ char c;
+
+ c = src[res];
+ dest[res] = c;
+ if (!c)
+ return res;
+ res++;
+ count--;
+ }
+
+ /* Force NUL-termination. */
+ dest[res] = '\0';
+
+ /* Return E2BIG if the source didn't stop */
+ return src[res] ? -E2BIG : res;
}
+#define __strscpy0(dst, src, ...) \
+ sized_strscpy(dst, src, sizeof(dst))
+#define __strscpy1(dst, src, size) \
+ sized_strscpy(dst, src, size)
+
+#undef strscpy /* Redefine the placeholder from tools/include/linux/string.h */
+#define strscpy(dst, src, ...) \
+ CONCATENATE(__strscpy, COUNT_ARGS(__VA_ARGS__))(dst, src, __VA_ARGS__)
+
#define __bpf_percpu_val_align __attribute__((__aligned__(8)))
#define BPF_DECLARE_PERCPU(type, name) \
diff --git a/tools/testing/selftests/bpf/bpftool_helpers.c b/tools/testing/selftests/bpf/bpftool_helpers.c
index a5824945a4a5..929fc257f431 100644
--- a/tools/testing/selftests/bpf/bpftool_helpers.c
+++ b/tools/testing/selftests/bpf/bpftool_helpers.c
@@ -1,24 +1,37 @@
// SPDX-License-Identifier: GPL-2.0-only
-#include "bpftool_helpers.h"
#include <unistd.h>
#include <string.h>
#include <stdbool.h>
+#include "bpf_util.h"
+#include "bpftool_helpers.h"
+
#define BPFTOOL_PATH_MAX_LEN 64
#define BPFTOOL_FULL_CMD_MAX_LEN 512
#define BPFTOOL_DEFAULT_PATH "tools/sbin/bpftool"
-static int detect_bpftool_path(char *buffer)
+static int detect_bpftool_path(char *buffer, size_t size)
{
char tmp[BPFTOOL_PATH_MAX_LEN];
+ const char *env_path;
+
+ /* First, check if BPFTOOL environment variable is set */
+ env_path = getenv("BPFTOOL");
+ if (env_path && access(env_path, X_OK) == 0) {
+ strscpy(buffer, env_path, size);
+ return 0;
+ } else if (env_path) {
+ fprintf(stderr, "bpftool '%s' doesn't exist or is not executable\n", env_path);
+ return 1;
+ }
/* Check default bpftool location (will work if we are running the
* default flavor of test_progs)
*/
snprintf(tmp, BPFTOOL_PATH_MAX_LEN, "./%s", BPFTOOL_DEFAULT_PATH);
if (access(tmp, X_OK) == 0) {
- strncpy(buffer, tmp, BPFTOOL_PATH_MAX_LEN);
+ strscpy(buffer, tmp, size);
return 0;
}
@@ -27,11 +40,11 @@ static int detect_bpftool_path(char *buffer)
*/
snprintf(tmp, BPFTOOL_PATH_MAX_LEN, "../%s", BPFTOOL_DEFAULT_PATH);
if (access(tmp, X_OK) == 0) {
- strncpy(buffer, tmp, BPFTOOL_PATH_MAX_LEN);
+ strscpy(buffer, tmp, size);
return 0;
}
- /* Failed to find bpftool binary */
+ fprintf(stderr, "Failed to detect bpftool path, use BPFTOOL env var to override\n");
return 1;
}
@@ -44,7 +57,7 @@ static int run_command(char *args, char *output_buf, size_t output_max_len)
int ret;
/* Detect and cache bpftool binary location */
- if (bpftool_path[0] == 0 && detect_bpftool_path(bpftool_path))
+ if (bpftool_path[0] == 0 && detect_bpftool_path(bpftool_path, sizeof(bpftool_path)))
return 1;
ret = snprintf(command, BPFTOOL_FULL_CMD_MAX_LEN, "%s %s%s",
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index 20cede4db3ce..45cd0b479fe3 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -86,7 +86,7 @@ static int __enable_controllers(const char *cgroup_path, const char *controllers
enable[len] = 0;
close(fd);
} else {
- bpf_strlcpy(enable, controllers, sizeof(enable));
+ strscpy(enable, controllers);
}
snprintf(path, sizeof(path), "%s/cgroup.subtree_control", cgroup_path);
diff --git a/tools/testing/selftests/bpf/jit_disasm_helpers.c b/tools/testing/selftests/bpf/jit_disasm_helpers.c
index febd6b12e372..364c557c5115 100644
--- a/tools/testing/selftests/bpf/jit_disasm_helpers.c
+++ b/tools/testing/selftests/bpf/jit_disasm_helpers.c
@@ -122,15 +122,15 @@ static int disasm_one_func(FILE *text_out, uint8_t *image, __u32 len)
pc += cnt;
}
qsort(labels.pcs, labels.cnt, sizeof(*labels.pcs), cmp_u32);
- for (i = 0; i < labels.cnt; ++i)
- /* gcc is unable to infer upper bound for labels.cnt and assumes
- * it to be U32_MAX. U32_MAX takes 10 decimal digits.
- * snprintf below prints into labels.names[*],
- * which has space only for two digits and a letter.
- * To avoid truncation warning use (i % MAX_LOCAL_LABELS),
- * which informs gcc about printed value upper bound.
- */
- snprintf(labels.names[i], sizeof(labels.names[i]), "L%d", i % MAX_LOCAL_LABELS);
+ /* gcc is unable to infer upper bound for labels.cnt and
+ * assumes it to be U32_MAX. U32_MAX takes 10 decimal digits.
+ * snprintf below prints into labels.names[*], which has space
+ * only for two digits and a letter. To avoid truncation
+ * warning use (i < MAX_LOCAL_LABELS), which informs gcc about
+ * printed value upper bound.
+ */
+ for (i = 0; i < labels.cnt && i < MAX_LOCAL_LABELS; ++i)
+ snprintf(labels.names[i], sizeof(labels.names[i]), "L%d", i);
/* now print with labels */
labels.print_phase = true;
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index 0a6a5561bed3..b82f572641b7 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -432,7 +432,7 @@ int make_sockaddr(int family, const char *addr_str, __u16 port,
memset(addr, 0, sizeof(*sun));
sun->sun_family = family;
sun->sun_path[0] = 0;
- strcpy(sun->sun_path + 1, addr_str);
+ strscpy(sun->sun_path + 1, addr_str, sizeof(sun->sun_path) - 1);
if (len)
*len = offsetof(struct sockaddr_un, sun_path) + 1 + strlen(addr_str);
return 0;
@@ -581,8 +581,7 @@ int open_tuntap(const char *dev_name, bool need_mac)
return -1;
ifr.ifr_flags = IFF_NO_PI | (need_mac ? IFF_TAP : IFF_TUN);
- strncpy(ifr.ifr_name, dev_name, IFNAMSIZ - 1);
- ifr.ifr_name[IFNAMSIZ - 1] = '\0';
+ strscpy(ifr.ifr_name, dev_name);
err = ioctl(fd, TUNSETIFF, &ifr);
if (!ASSERT_OK(err, "ioctl(TUNSETIFF)")) {
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 5225d69bf79b..c69080ca14f5 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -346,8 +346,7 @@ static void test_task_sleepable(void)
close(finish_pipe[1]);
test_data = malloc(sizeof(char) * 10);
- strncpy(test_data, "test_data", 10);
- test_data[9] = '\0';
+ strscpy(test_data, "test_data", 10);
test_data_long = malloc(sizeof(char) * 5000);
for (int i = 0; i < 5000; ++i) {
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
index b7d1b52309d0..f829b6f09bc9 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
@@ -281,7 +281,7 @@ static void test_dctcp_fallback(void)
dctcp_skel = bpf_dctcp__open();
if (!ASSERT_OK_PTR(dctcp_skel, "dctcp_skel"))
return;
- strcpy(dctcp_skel->rodata->fallback_cc, "cubic");
+ strscpy(dctcp_skel->rodata->fallback_cc, "cubic");
if (!ASSERT_OK(bpf_dctcp__load(dctcp_skel), "bpf_dctcp__load"))
goto done;
diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c
index 9015e2c2ab12..478a77cb67e6 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c
@@ -202,7 +202,7 @@ static void test_cgroup_iter_sleepable(int cgroup_fd, __u64 cgroup_id)
iter_fd = bpf_iter_create(bpf_link__fd(link));
if (!ASSERT_GE(iter_fd, 0, "iter_create"))
- goto out;
+ goto out_link;
/* trigger the program run */
(void)read(iter_fd, buf, sizeof(buf));
@@ -210,6 +210,8 @@ static void test_cgroup_iter_sleepable(int cgroup_fd, __u64 cgroup_id)
ASSERT_EQ(skel->bss->cgroup_id, cgroup_id, "cgroup_id");
close(iter_fd);
+out_link:
+ bpf_link__destroy(link);
out:
cgrp_ls_sleepable__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c
index dd75ccb03770..469e92869523 100644
--- a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c
+++ b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c
@@ -308,8 +308,10 @@ static int find_field_offset(struct btf *btf, char *pattern, regmatch_t *matches
return -1;
}
- strncpy(type_str, type, type_sz);
- strncpy(field_str, field, field_sz);
+ memcpy(type_str, type, type_sz);
+ type_str[type_sz] = '\0';
+ memcpy(field_str, field, field_sz);
+ field_str[field_sz] = '\0';
btf_id = btf__find_by_name(btf, type_str);
if (btf_id < 0) {
PRINT_FAIL("No BTF info for type %s\n", type_str);
diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c
index b9f86cb91e81..5fda11590708 100644
--- a/tools/testing/selftests/bpf/prog_tests/dynptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c
@@ -137,11 +137,14 @@ static void verify_success(const char *prog_name, enum test_setup_type setup_typ
);
link = bpf_program__attach(prog);
- if (!ASSERT_OK_PTR(link, "bpf_program__attach"))
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach")) {
+ bpf_object__close(obj);
goto cleanup;
+ }
err = bpf_prog_test_run_opts(aux_prog_fd, &topts);
bpf_link__destroy(link);
+ bpf_object__close(obj);
if (!ASSERT_OK(err, "test_run"))
goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/fd_array.c b/tools/testing/selftests/bpf/prog_tests/fd_array.c
index c534b4d5f9da..3078d8264deb 100644
--- a/tools/testing/selftests/bpf/prog_tests/fd_array.c
+++ b/tools/testing/selftests/bpf/prog_tests/fd_array.c
@@ -412,8 +412,8 @@ static void check_fd_array_cnt__fd_array_too_big(void)
ASSERT_EQ(prog_fd, -E2BIG, "prog should have been rejected with -E2BIG");
cleanup_fds:
- while (i > 0)
- Close(extra_fds[--i]);
+ while (i-- > 0)
+ Close(extra_fds[i]);
}
void test_fd_array_cnt(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index 08bae13248c4..fb4892681464 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -570,7 +570,7 @@ static int create_tap(const char *ifname)
};
int fd, ret;
- strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
+ strscpy(ifr.ifr_name, ifname);
fd = open("/dev/net/tun", O_RDWR);
if (fd < 0)
@@ -599,7 +599,7 @@ static int ifup(const char *ifname)
struct ifreq ifr = {};
int sk, ret;
- strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
+ strscpy(ifr.ifr_name, ifname);
sk = socket(PF_INET, SOCK_DGRAM, 0);
if (sk < 0)
diff --git a/tools/testing/selftests/bpf/prog_tests/htab_update.c b/tools/testing/selftests/bpf/prog_tests/htab_update.c
index d0b405eb2966..ea1a6766fbe9 100644
--- a/tools/testing/selftests/bpf/prog_tests/htab_update.c
+++ b/tools/testing/selftests/bpf/prog_tests/htab_update.c
@@ -61,6 +61,7 @@ static void test_reenter_update(void)
ASSERT_EQ(skel->bss->update_err, -EDEADLK, "no reentrancy");
out:
+ free(value);
htab_update__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c
index 6e35e13c2022..399fe9103f83 100644
--- a/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c
@@ -104,11 +104,8 @@ void test_kmem_cache_iter(void)
if (!ASSERT_GE(iter_fd, 0, "iter_create"))
goto destroy;
- memset(buf, 0, sizeof(buf));
- while (read(iter_fd, buf, sizeof(buf)) > 0) {
- /* Read out all contents */
- printf("%s", buf);
- }
+ while (read(iter_fd, buf, sizeof(buf)) > 0)
+ ; /* Read out all contents */
/* Next reads should return 0 */
ASSERT_EQ(read(iter_fd, buf, sizeof(buf)), 0, "read");
diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
index 9caef222e528..f81dcd609ee9 100644
--- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
@@ -456,25 +456,23 @@ static void test_kprobe_multi_bench_attach(bool kernel)
{
LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
struct kprobe_multi_empty *skel = NULL;
- char **syms = NULL;
- size_t cnt = 0;
+ struct ksyms *ksyms = NULL;
- if (!ASSERT_OK(bpf_get_ksyms(&syms, &cnt, kernel), "bpf_get_ksyms"))
+ if (!ASSERT_OK(bpf_get_ksyms(&ksyms, kernel), "bpf_get_ksyms"))
return;
skel = kprobe_multi_empty__open_and_load();
if (!ASSERT_OK_PTR(skel, "kprobe_multi_empty__open_and_load"))
goto cleanup;
- opts.syms = (const char **) syms;
- opts.cnt = cnt;
+ opts.syms = (const char **)ksyms->filtered_syms;
+ opts.cnt = ksyms->filtered_cnt;
do_bench_test(skel, &opts);
cleanup:
kprobe_multi_empty__destroy(skel);
- if (syms)
- free(syms);
+ free_kallsyms_local(ksyms);
}
static void test_kprobe_multi_bench_attach_addr(bool kernel)
diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_seg6local.c b/tools/testing/selftests/bpf/prog_tests/lwt_seg6local.c
index 3bc730b7c7fa..1b25d5c5f8fb 100644
--- a/tools/testing/selftests/bpf/prog_tests/lwt_seg6local.c
+++ b/tools/testing/selftests/bpf/prog_tests/lwt_seg6local.c
@@ -117,7 +117,7 @@ void test_lwt_seg6local(void)
const char *ns1 = NETNS_BASE "1";
const char *ns6 = NETNS_BASE "6";
struct nstoken *nstoken = NULL;
- const char *foobar = "foobar";
+ const char foobar[] = "foobar";
ssize_t bytes;
int sfd, cfd;
char buf[7];
diff --git a/tools/testing/selftests/bpf/prog_tests/map_kptr_race.c b/tools/testing/selftests/bpf/prog_tests/map_kptr_race.c
new file mode 100644
index 000000000000..506ed55e8528
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/map_kptr_race.c
@@ -0,0 +1,218 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "map_kptr_race.skel.h"
+
+static int get_map_id(int map_fd)
+{
+ struct bpf_map_info info = {};
+ __u32 len = sizeof(info);
+
+ if (!ASSERT_OK(bpf_map_get_info_by_fd(map_fd, &info, &len), "get_map_info"))
+ return -1;
+ return info.id;
+}
+
+static int read_refs(struct map_kptr_race *skel)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ int ret;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.count_ref), &opts);
+ if (!ASSERT_OK(ret, "count_ref run"))
+ return -1;
+ if (!ASSERT_OK(opts.retval, "count_ref retval"))
+ return -1;
+ return skel->bss->num_of_refs;
+}
+
+static void test_htab_leak(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct map_kptr_race *skel, *watcher;
+ int ret, map_id;
+
+ skel = map_kptr_race__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_htab_leak), &opts);
+ if (!ASSERT_OK(ret, "test_htab_leak run"))
+ goto out_skel;
+ if (!ASSERT_OK(opts.retval, "test_htab_leak retval"))
+ goto out_skel;
+
+ map_id = get_map_id(bpf_map__fd(skel->maps.race_hash_map));
+ if (!ASSERT_GE(map_id, 0, "map_id"))
+ goto out_skel;
+
+ watcher = map_kptr_race__open_and_load();
+ if (!ASSERT_OK_PTR(watcher, "watcher open_and_load"))
+ goto out_skel;
+
+ watcher->bss->target_map_id = map_id;
+ watcher->links.map_put = bpf_program__attach(watcher->progs.map_put);
+ if (!ASSERT_OK_PTR(watcher->links.map_put, "attach fentry"))
+ goto out_watcher;
+ watcher->links.htab_map_free = bpf_program__attach(watcher->progs.htab_map_free);
+ if (!ASSERT_OK_PTR(watcher->links.htab_map_free, "attach fexit"))
+ goto out_watcher;
+
+ map_kptr_race__destroy(skel);
+ skel = NULL;
+
+ kern_sync_rcu();
+
+ while (!READ_ONCE(watcher->bss->map_freed))
+ sched_yield();
+
+ ASSERT_EQ(watcher->bss->map_freed, 1, "map_freed");
+ ASSERT_EQ(read_refs(watcher), 2, "htab refcount");
+
+out_watcher:
+ map_kptr_race__destroy(watcher);
+out_skel:
+ map_kptr_race__destroy(skel);
+}
+
+static void test_percpu_htab_leak(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct map_kptr_race *skel, *watcher;
+ int ret, map_id;
+
+ skel = map_kptr_race__open();
+ if (!ASSERT_OK_PTR(skel, "open"))
+ return;
+
+ skel->rodata->nr_cpus = libbpf_num_possible_cpus();
+ if (skel->rodata->nr_cpus > 16)
+ skel->rodata->nr_cpus = 16;
+
+ ret = map_kptr_race__load(skel);
+ if (!ASSERT_OK(ret, "load"))
+ goto out_skel;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_percpu_htab_leak), &opts);
+ if (!ASSERT_OK(ret, "test_percpu_htab_leak run"))
+ goto out_skel;
+ if (!ASSERT_OK(opts.retval, "test_percpu_htab_leak retval"))
+ goto out_skel;
+
+ map_id = get_map_id(bpf_map__fd(skel->maps.race_percpu_hash_map));
+ if (!ASSERT_GE(map_id, 0, "map_id"))
+ goto out_skel;
+
+ watcher = map_kptr_race__open_and_load();
+ if (!ASSERT_OK_PTR(watcher, "watcher open_and_load"))
+ goto out_skel;
+
+ watcher->bss->target_map_id = map_id;
+ watcher->links.map_put = bpf_program__attach(watcher->progs.map_put);
+ if (!ASSERT_OK_PTR(watcher->links.map_put, "attach fentry"))
+ goto out_watcher;
+ watcher->links.htab_map_free = bpf_program__attach(watcher->progs.htab_map_free);
+ if (!ASSERT_OK_PTR(watcher->links.htab_map_free, "attach fexit"))
+ goto out_watcher;
+
+ map_kptr_race__destroy(skel);
+ skel = NULL;
+
+ kern_sync_rcu();
+
+ while (!READ_ONCE(watcher->bss->map_freed))
+ sched_yield();
+
+ ASSERT_EQ(watcher->bss->map_freed, 1, "map_freed");
+ ASSERT_EQ(read_refs(watcher), 2, "percpu_htab refcount");
+
+out_watcher:
+ map_kptr_race__destroy(watcher);
+out_skel:
+ map_kptr_race__destroy(skel);
+}
+
+static void test_sk_ls_leak(void)
+{
+ struct map_kptr_race *skel, *watcher;
+ int listen_fd = -1, client_fd = -1, map_id;
+
+ skel = map_kptr_race__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ if (!ASSERT_OK(map_kptr_race__attach(skel), "attach"))
+ goto out_skel;
+
+ listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+ if (!ASSERT_GE(listen_fd, 0, "start_server"))
+ goto out_skel;
+
+ client_fd = connect_to_fd(listen_fd, 0);
+ if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
+ goto out_skel;
+
+ if (!ASSERT_EQ(skel->bss->sk_ls_leak_done, 1, "sk_ls_leak_done"))
+ goto out_skel;
+
+ close(client_fd);
+ client_fd = -1;
+ close(listen_fd);
+ listen_fd = -1;
+
+ map_id = get_map_id(bpf_map__fd(skel->maps.race_sk_ls_map));
+ if (!ASSERT_GE(map_id, 0, "map_id"))
+ goto out_skel;
+
+ watcher = map_kptr_race__open_and_load();
+ if (!ASSERT_OK_PTR(watcher, "watcher open_and_load"))
+ goto out_skel;
+
+ watcher->bss->target_map_id = map_id;
+ watcher->links.map_put = bpf_program__attach(watcher->progs.map_put);
+ if (!ASSERT_OK_PTR(watcher->links.map_put, "attach fentry"))
+ goto out_watcher;
+ watcher->links.sk_map_free = bpf_program__attach(watcher->progs.sk_map_free);
+ if (!ASSERT_OK_PTR(watcher->links.sk_map_free, "attach fexit"))
+ goto out_watcher;
+
+ map_kptr_race__destroy(skel);
+ skel = NULL;
+
+ kern_sync_rcu();
+
+ while (!READ_ONCE(watcher->bss->map_freed))
+ sched_yield();
+
+ ASSERT_EQ(watcher->bss->map_freed, 1, "map_freed");
+ ASSERT_EQ(read_refs(watcher), 2, "sk_ls refcount");
+
+out_watcher:
+ map_kptr_race__destroy(watcher);
+out_skel:
+ if (client_fd >= 0)
+ close(client_fd);
+ if (listen_fd >= 0)
+ close(listen_fd);
+ map_kptr_race__destroy(skel);
+}
+
+void serial_test_map_kptr_race(void)
+{
+ if (test__start_subtest("htab_leak"))
+ test_htab_leak();
+ if (test__start_subtest("percpu_htab_leak"))
+ test_percpu_htab_leak();
+ if (test__start_subtest("sk_ls_leak"))
+ test_sk_ls_leak();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
index a043af9cd6d9..41441325e179 100644
--- a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
@@ -28,9 +28,9 @@ static void test_queue_stack_map_by_type(int type)
vals[i] = rand();
if (type == QUEUE)
- strncpy(file, "./test_queue_map.bpf.o", sizeof(file));
+ strscpy(file, "./test_queue_map.bpf.o");
else if (type == STACK)
- strncpy(file, "./test_stack_map.bpf.o", sizeof(file));
+ strscpy(file, "./test_stack_map.bpf.o");
else
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
index d93a0c7b1786..0322f817d07b 100644
--- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
+++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
@@ -2091,7 +2091,7 @@ static struct subtest_case crafted_cases[] = {
{U64, S64, {0, 0xffffffffULL}, {0x7fffffff, 0x7fffffff}},
{U64, U32, {0, 0x100000000}, {0, 0}},
- {U64, U32, {0xfffffffe, 0x100000000}, {0x80000000, 0x80000000}},
+ {U64, U32, {0xfffffffe, 0x300000000}, {0x80000000, 0x80000000}},
{U64, S32, {0, 0xffffffff00000000ULL}, {0, 0}},
/* these are tricky cases where lower 32 bits allow to tighten 64
diff --git a/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c b/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c
index e4dac529d424..77fe1bfb7504 100644
--- a/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c
+++ b/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c
@@ -212,7 +212,7 @@ void test_setget_sockopt(void)
if (!ASSERT_OK_PTR(skel, "open skel"))
goto done;
- strcpy(skel->rodata->veth, "binddevtest1");
+ strscpy(skel->rodata->veth, "binddevtest1");
skel->rodata->veth_ifindex = if_nametoindex("binddevtest1");
if (!ASSERT_GT(skel->rodata->veth_ifindex, 0, "if_nametoindex"))
goto done;
diff --git a/tools/testing/selftests/bpf/prog_tests/skc_to_unix_sock.c b/tools/testing/selftests/bpf/prog_tests/skc_to_unix_sock.c
index 3eefdfed1db9..657d897958b6 100644
--- a/tools/testing/selftests/bpf/prog_tests/skc_to_unix_sock.c
+++ b/tools/testing/selftests/bpf/prog_tests/skc_to_unix_sock.c
@@ -34,7 +34,7 @@ void test_skc_to_unix_sock(void)
memset(&sockaddr, 0, sizeof(sockaddr));
sockaddr.sun_family = AF_UNIX;
- strncpy(sockaddr.sun_path, sock_path, strlen(sock_path));
+ strscpy(sockaddr.sun_path, sock_path);
sockaddr.sun_path[0] = '\0';
err = bind(sockfd, (struct sockaddr *)&sockaddr, sizeof(sockaddr));
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index 256707e7d20d..dd3c757859f6 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -204,7 +204,7 @@ static void test_skmsg_helpers_with_link(enum bpf_map_type map_type)
/* Fail since bpf_link for the same prog type has been created. */
link2 = bpf_program__attach_sockmap(prog_clone, map);
if (!ASSERT_ERR_PTR(link2, "bpf_program__attach_sockmap")) {
- bpf_link__detach(link2);
+ bpf_link__destroy(link2);
goto out;
}
@@ -230,7 +230,7 @@ static void test_skmsg_helpers_with_link(enum bpf_map_type map_type)
if (!ASSERT_OK(err, "bpf_link_update"))
goto out;
out:
- bpf_link__detach(link);
+ bpf_link__destroy(link);
test_skmsg_load_helpers__destroy(skel);
}
@@ -417,7 +417,7 @@ static void test_sockmap_skb_verdict_attach_with_link(void)
if (!ASSERT_OK_PTR(link, "bpf_program__attach_sockmap"))
goto out;
- bpf_link__detach(link);
+ bpf_link__destroy(link);
err = bpf_prog_attach(bpf_program__fd(prog), map, BPF_SK_SKB_STREAM_VERDICT, 0);
if (!ASSERT_OK(err, "bpf_prog_attach"))
@@ -426,7 +426,7 @@ static void test_sockmap_skb_verdict_attach_with_link(void)
/* Fail since attaching with the same prog/map has been done. */
link = bpf_program__attach_sockmap(prog, map);
if (!ASSERT_ERR_PTR(link, "bpf_program__attach_sockmap"))
- bpf_link__detach(link);
+ bpf_link__destroy(link);
err = bpf_prog_detach2(bpf_program__fd(prog), map, BPF_SK_SKB_STREAM_VERDICT);
if (!ASSERT_OK(err, "bpf_prog_detach2"))
@@ -747,13 +747,13 @@ static void test_sockmap_skb_verdict_peek_with_link(void)
test_sockmap_skb_verdict_peek_helper(map);
ASSERT_EQ(pass->bss->clone_called, 1, "clone_called");
out:
- bpf_link__detach(link);
+ bpf_link__destroy(link);
test_sockmap_pass_prog__destroy(pass);
}
static void test_sockmap_unconnected_unix(void)
{
- int err, map, stream = 0, dgram = 0, zero = 0;
+ int err, map, stream = -1, dgram = -1, zero = 0;
struct test_sockmap_pass_prog *skel;
skel = test_sockmap_pass_prog__open_and_load();
@@ -764,22 +764,22 @@ static void test_sockmap_unconnected_unix(void)
stream = xsocket(AF_UNIX, SOCK_STREAM, 0);
if (stream < 0)
- return;
+ goto out;
dgram = xsocket(AF_UNIX, SOCK_DGRAM, 0);
- if (dgram < 0) {
- close(stream);
- return;
- }
+ if (dgram < 0)
+ goto out;
err = bpf_map_update_elem(map, &zero, &stream, BPF_ANY);
- ASSERT_ERR(err, "bpf_map_update_elem(stream)");
+ if (!ASSERT_ERR(err, "bpf_map_update_elem(stream)"))
+ goto out;
err = bpf_map_update_elem(map, &zero, &dgram, BPF_ANY);
ASSERT_OK(err, "bpf_map_update_elem(dgram)");
-
+out:
close(stream);
close(dgram);
+ test_sockmap_pass_prog__destroy(skel);
}
static void test_sockmap_many_socket(void)
@@ -1027,7 +1027,7 @@ static void test_sockmap_skb_verdict_vsock_poll(void)
if (xrecv_nonblock(conn, &buf, 1, 0) != 1)
FAIL("xrecv_nonblock");
detach:
- bpf_link__detach(link);
+ bpf_link__destroy(link);
close:
xclose(conn);
xclose(peer);
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index f1bdccc7e4e7..cc0c68bab907 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -899,7 +899,7 @@ static void test_msg_redir_to_listening_with_link(struct test_sockmap_listen *sk
redir_to_listening(family, sotype, sock_map, verdict_map, REDIR_EGRESS);
- bpf_link__detach(link);
+ bpf_link__destroy(link);
}
static void redir_partial(int family, int sotype, int sock_map, int parser_map)
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
index ba6b3ec1156a..53637431ec5d 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
@@ -142,7 +142,7 @@ static int getsetsockopt(void)
/* TCP_CONGESTION can extend the string */
- strcpy(buf.cc, "nv");
+ strscpy(buf.cc, "nv");
err = setsockopt(fd, SOL_TCP, TCP_CONGESTION, &buf, strlen("nv"));
if (err) {
log_err("Failed to call setsockopt(TCP_CONGESTION)");
diff --git a/tools/testing/selftests/bpf/prog_tests/struct_ops_private_stack.c b/tools/testing/selftests/bpf/prog_tests/struct_ops_private_stack.c
index 4006879ca3fe..d42123a0fb16 100644
--- a/tools/testing/selftests/bpf/prog_tests/struct_ops_private_stack.c
+++ b/tools/testing/selftests/bpf/prog_tests/struct_ops_private_stack.c
@@ -54,9 +54,7 @@ static void test_private_stack_fail(void)
}
err = struct_ops_private_stack_fail__load(skel);
- if (!ASSERT_ERR(err, "struct_ops_private_stack_fail__load"))
- goto cleanup;
- return;
+ ASSERT_ERR(err, "struct_ops_private_stack_fail__load");
cleanup:
struct_ops_private_stack_fail__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_data.h b/tools/testing/selftests/bpf/prog_tests/task_local_data.h
index 0f86b9275cf9..8342e2fe5260 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_local_data.h
+++ b/tools/testing/selftests/bpf/prog_tests/task_local_data.h
@@ -262,7 +262,7 @@ retry:
if (!atomic_compare_exchange_strong(&tld_meta_p->cnt, &cnt, cnt + 1))
goto retry;
- strncpy(tld_meta_p->metadata[i].name, name, TLD_NAME_LEN);
+ strscpy(tld_meta_p->metadata[i].name, name);
atomic_store(&tld_meta_p->metadata[i].size, size);
return (tld_key_t){(__s16)off};
}
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_opts.c b/tools/testing/selftests/bpf/prog_tests/tc_opts.c
index dd7a138d8c3d..2955750ddada 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_opts.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_opts.c
@@ -1360,10 +1360,8 @@ static void test_tc_opts_dev_cleanup_target(int target)
assert_mprog_count_ifindex(ifindex, target, 4);
- ASSERT_OK(system("ip link del dev tcx_opts1"), "del veth");
- ASSERT_EQ(if_nametoindex("tcx_opts1"), 0, "dev1_removed");
- ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed");
- return;
+ goto cleanup;
+
cleanup3:
err = bpf_prog_detach_opts(fd3, loopback, target, &optd);
ASSERT_OK(err, "prog_detach");
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index 76d72a59365e..64fbda082309 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -1095,7 +1095,7 @@ static int tun_open(char *name)
ifr.ifr_flags = IFF_TUN | IFF_NO_PI;
if (*name)
- strncpy(ifr.ifr_name, name, IFNAMSIZ);
+ strscpy(ifr.ifr_name, name);
err = ioctl(fd, TUNSETIFF, &ifr);
if (!ASSERT_OK(err, "ioctl TUNSETIFF"))
diff --git a/tools/testing/selftests/bpf/prog_tests/test_sysctl.c b/tools/testing/selftests/bpf/prog_tests/test_sysctl.c
index 273dd41ca09e..2a1ff821bc97 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_sysctl.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_sysctl.c
@@ -27,6 +27,7 @@ struct sysctl_test {
OP_EPERM,
SUCCESS,
} result;
+ struct bpf_object *obj;
};
static struct sysctl_test tests[] = {
@@ -1471,6 +1472,7 @@ static int load_sysctl_prog_file(struct sysctl_test *test)
return -1;
}
+ test->obj = obj;
return prog_fd;
}
@@ -1573,6 +1575,7 @@ out:
/* Detaching w/o checking return code: best effort attempt. */
if (progfd != -1)
bpf_prog_detach(cgfd, atype);
+ bpf_object__close(test->obj);
close(progfd);
printf("[%s]\n", err ? "FAIL" : "PASS");
return err;
diff --git a/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c
index 0fe0a8f62486..7fc4d7dd70ef 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c
@@ -699,7 +699,7 @@ void test_tc_tunnel(void)
return;
if (!ASSERT_OK(setup(), "global setup"))
- return;
+ goto out;
for (i = 0; i < ARRAY_SIZE(subtests_cfg); i++) {
cfg = &subtests_cfg[i];
@@ -711,4 +711,7 @@ void test_tc_tunnel(void)
subtest_cleanup(cfg);
}
cleanup();
+
+out:
+ test_tc_tunnel__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_veristat.c b/tools/testing/selftests/bpf/prog_tests/test_veristat.c
index b38c16b4247f..9aff08ac55c0 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_veristat.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_veristat.c
@@ -24,9 +24,9 @@ static struct fixture *init_fixture(void)
/* for no_alu32 and cpuv4 veristat is in parent folder */
if (access("./veristat", F_OK) == 0)
- strcpy(fix->veristat, "./veristat");
+ strscpy(fix->veristat, "./veristat");
else if (access("../veristat", F_OK) == 0)
- strcpy(fix->veristat, "../veristat");
+ strscpy(fix->veristat, "../veristat");
else
PRINT_FAIL("Can't find veristat binary");
diff --git a/tools/testing/selftests/bpf/prog_tests/test_xsk.c b/tools/testing/selftests/bpf/prog_tests/test_xsk.c
index bab4a31621c7..7e38ec6e656b 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_xsk.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_xsk.c
@@ -2003,9 +2003,17 @@ int testapp_stats_tx_invalid_descs(struct test_spec *test)
int testapp_stats_rx_full(struct test_spec *test)
{
- if (pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE))
+ struct pkt_stream *tmp;
+
+ tmp = pkt_stream_generate(DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE);
+ if (!tmp)
+ return TEST_FAILURE;
+ test->ifobj_tx->xsk->pkt_stream = tmp;
+
+ tmp = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE);
+ if (!tmp)
return TEST_FAILURE;
- test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE);
+ test->ifobj_rx->xsk->pkt_stream = tmp;
test->ifobj_rx->xsk->rxqsize = DEFAULT_UMEM_BUFFERS;
test->ifobj_rx->release_rx = false;
@@ -2015,9 +2023,17 @@ int testapp_stats_rx_full(struct test_spec *test)
int testapp_stats_fill_empty(struct test_spec *test)
{
- if (pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE))
+ struct pkt_stream *tmp;
+
+ tmp = pkt_stream_generate(DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE);
+ if (!tmp)
+ return TEST_FAILURE;
+ test->ifobj_tx->xsk->pkt_stream = tmp;
+
+ tmp = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE);
+ if (!tmp)
return TEST_FAILURE;
- test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE);
+ test->ifobj_rx->xsk->pkt_stream = tmp;
test->ifobj_rx->use_fill_ring = false;
test->ifobj_rx->validation_func = validate_fill_empty;
diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
index 2ee17ef1dae2..56cbea280fbd 100644
--- a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
@@ -62,8 +62,10 @@ static void release_child(struct child *child)
return;
close(child->go[1]);
close(child->go[0]);
- if (child->thread)
+ if (child->thread) {
pthread_join(child->thread, NULL);
+ child->thread = 0;
+ }
close(child->c2p[0]);
close(child->c2p[1]);
if (child->pid > 0)
@@ -331,6 +333,8 @@ test_attach_api(const char *binary, const char *pattern, struct bpf_uprobe_multi
{
static struct child child;
+ memset(&child, 0, sizeof(child));
+
/* no pid filter */
__test_attach_api(binary, pattern, opts, NULL);
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier_log.c b/tools/testing/selftests/bpf/prog_tests/verifier_log.c
index 8337c6bc5b95..aaa2854974c0 100644
--- a/tools/testing/selftests/bpf/prog_tests/verifier_log.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier_log.c
@@ -47,7 +47,7 @@ static int load_prog(struct bpf_prog_load_opts *opts, bool expect_load_error)
static void verif_log_subtest(const char *name, bool expect_load_error, int log_level)
{
LIBBPF_OPTS(bpf_prog_load_opts, opts);
- char *exp_log, prog_name[16], op_name[32];
+ char *exp_log, prog_name[24], op_name[32];
struct test_log_buf *skel;
struct bpf_program *prog;
size_t fixed_log_sz;
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
index fb952703653e..e8ea26464349 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
@@ -610,6 +610,61 @@ out:
system("ip link del bond");
}
+/*
+ * Test that changing xmit_hash_policy to vlan+srcmac is rejected when a
+ * native XDP program is loaded on a bond in 802.3ad or balance-xor mode.
+ * These modes support XDP only when xmit_hash_policy != vlan+srcmac; freely
+ * changing the policy creates an inconsistency that triggers a WARNING in
+ * dev_xdp_uninstall() during device teardown.
+ */
+static void test_xdp_bonding_xmit_policy_compat(struct skeletons *skeletons)
+{
+ struct nstoken *nstoken = NULL;
+ int bond_ifindex = -1;
+ int xdp_fd, err;
+
+ SYS(out, "ip netns add ns_xmit_policy");
+ nstoken = open_netns("ns_xmit_policy");
+ if (!ASSERT_OK_PTR(nstoken, "open ns_xmit_policy"))
+ goto out;
+
+ /* 802.3ad with layer2+3 policy: native XDP is supported */
+ SYS(out, "ip link add bond0 type bond mode 802.3ad xmit_hash_policy layer2+3");
+ SYS(out, "ip link add veth0 type veth peer name veth0p");
+ SYS(out, "ip link set veth0 master bond0");
+ SYS(out, "ip link set bond0 up");
+
+ bond_ifindex = if_nametoindex("bond0");
+ if (!ASSERT_GT(bond_ifindex, 0, "bond0 ifindex"))
+ goto out;
+
+ xdp_fd = bpf_program__fd(skeletons->xdp_dummy->progs.xdp_dummy_prog);
+ if (!ASSERT_GE(xdp_fd, 0, "xdp_dummy fd"))
+ goto out;
+
+ err = bpf_xdp_attach(bond_ifindex, xdp_fd, XDP_FLAGS_DRV_MODE, NULL);
+ if (!ASSERT_OK(err, "attach XDP to bond0"))
+ goto out;
+
+ /* With XDP loaded, switching to vlan+srcmac must be rejected */
+ err = system("ip link set bond0 type bond xmit_hash_policy vlan+srcmac 2>/dev/null");
+ ASSERT_NEQ(err, 0, "vlan+srcmac change with XDP loaded should fail");
+
+ /* Detach XDP first, then the same change must succeed */
+ ASSERT_OK(bpf_xdp_detach(bond_ifindex, XDP_FLAGS_DRV_MODE, NULL),
+ "detach XDP from bond0");
+
+ bond_ifindex = -1;
+ err = system("ip link set bond0 type bond xmit_hash_policy vlan+srcmac 2>/dev/null");
+ ASSERT_OK(err, "vlan+srcmac change without XDP should succeed");
+
+out:
+ if (bond_ifindex > 0)
+ bpf_xdp_detach(bond_ifindex, XDP_FLAGS_DRV_MODE, NULL);
+ close_netns(nstoken);
+ SYS_NOFAIL("ip netns del ns_xmit_policy");
+}
+
static int libbpf_debug_print(enum libbpf_print_level level,
const char *format, va_list args)
{
@@ -677,6 +732,9 @@ void serial_test_xdp_bonding(void)
test_case->xmit_policy);
}
+ if (test__start_subtest("xdp_bonding_xmit_policy_compat"))
+ test_xdp_bonding_xmit_policy_compat(&skeletons);
+
if (test__start_subtest("xdp_bonding_redirect_multi"))
test_xdp_bonding_redirect_multi(&skeletons);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c b/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c
index 3f9146d83d79..325e0b64dc35 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c
@@ -67,7 +67,7 @@ void test_xdp_flowtable(void)
struct nstoken *tok = NULL;
int iifindex, stats_fd;
__u32 value, key = 0;
- struct bpf_link *link;
+ struct bpf_link *link = NULL;
if (SYS_NOFAIL("nft -v")) {
fprintf(stdout, "Missing required nft tool\n");
@@ -160,6 +160,7 @@ void test_xdp_flowtable(void)
ASSERT_GE(value, N_PACKETS - 2, "bpf_xdp_flow_lookup failed");
out:
+ bpf_link__destroy(link);
xdp_flowtable__destroy(skel);
if (tok)
close_netns(tok);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
index 19f92affc2da..5c31054ad4a4 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
@@ -126,10 +126,10 @@ static int open_xsk(int ifindex, struct xsk *xsk)
static void close_xsk(struct xsk *xsk)
{
- if (xsk->umem)
- xsk_umem__delete(xsk->umem);
if (xsk->socket)
xsk_socket__delete(xsk->socket);
+ if (xsk->umem)
+ xsk_umem__delete(xsk->umem);
munmap(xsk->umem_area, UMEM_SIZE);
}
diff --git a/tools/testing/selftests/bpf/progs/dmabuf_iter.c b/tools/testing/selftests/bpf/progs/dmabuf_iter.c
index 13cdb11fdeb2..9cbb7442646e 100644
--- a/tools/testing/selftests/bpf/progs/dmabuf_iter.c
+++ b/tools/testing/selftests/bpf/progs/dmabuf_iter.c
@@ -48,7 +48,7 @@ int dmabuf_collector(struct bpf_iter__dmabuf *ctx)
/* Buffers are not required to be named */
if (pname) {
- if (bpf_probe_read_kernel(name, sizeof(name), pname))
+ if (bpf_probe_read_kernel_str(name, sizeof(name), pname) < 0)
return 1;
/* Name strings can be provided by userspace */
diff --git a/tools/testing/selftests/bpf/progs/map_kptr_race.c b/tools/testing/selftests/bpf/progs/map_kptr_race.c
new file mode 100644
index 000000000000..f6f136cd8f60
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/map_kptr_race.c
@@ -0,0 +1,197 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+struct map_value {
+ struct prog_test_ref_kfunc __kptr *ref_ptr;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+} race_hash_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+} race_percpu_hash_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct map_value);
+} race_sk_ls_map SEC(".maps");
+
+int num_of_refs;
+int sk_ls_leak_done;
+int target_map_id;
+int map_freed;
+const volatile int nr_cpus;
+
+SEC("tc")
+int test_htab_leak(struct __sk_buff *skb)
+{
+ struct prog_test_ref_kfunc *p, *old;
+ struct map_value val = {};
+ struct map_value *v;
+ int key = 0;
+
+ if (bpf_map_update_elem(&race_hash_map, &key, &val, BPF_ANY))
+ return 1;
+
+ v = bpf_map_lookup_elem(&race_hash_map, &key);
+ if (!v)
+ return 2;
+
+ p = bpf_kfunc_call_test_acquire(&(unsigned long){0});
+ if (!p)
+ return 3;
+ old = bpf_kptr_xchg(&v->ref_ptr, p);
+ if (old)
+ bpf_kfunc_call_test_release(old);
+
+ bpf_map_delete_elem(&race_hash_map, &key);
+
+ p = bpf_kfunc_call_test_acquire(&(unsigned long){0});
+ if (!p)
+ return 4;
+ old = bpf_kptr_xchg(&v->ref_ptr, p);
+ if (old)
+ bpf_kfunc_call_test_release(old);
+
+ return 0;
+}
+
+static int fill_percpu_kptr(struct map_value *v)
+{
+ struct prog_test_ref_kfunc *p, *old;
+
+ p = bpf_kfunc_call_test_acquire(&(unsigned long){0});
+ if (!p)
+ return 1;
+ old = bpf_kptr_xchg(&v->ref_ptr, p);
+ if (old)
+ bpf_kfunc_call_test_release(old);
+ return 0;
+}
+
+SEC("tc")
+int test_percpu_htab_leak(struct __sk_buff *skb)
+{
+ struct map_value *v, *arr[16] = {};
+ struct map_value val = {};
+ int key = 0;
+ int err = 0;
+
+ if (bpf_map_update_elem(&race_percpu_hash_map, &key, &val, BPF_ANY))
+ return 1;
+
+ for (int i = 0; i < nr_cpus; i++) {
+ v = bpf_map_lookup_percpu_elem(&race_percpu_hash_map, &key, i);
+ if (!v)
+ return 2;
+ arr[i] = v;
+ }
+
+ bpf_map_delete_elem(&race_percpu_hash_map, &key);
+
+ for (int i = 0; i < nr_cpus; i++) {
+ v = arr[i];
+ err = fill_percpu_kptr(v);
+ if (err)
+ return 3;
+ }
+
+ return 0;
+}
+
+SEC("tp_btf/inet_sock_set_state")
+int BPF_PROG(test_sk_ls_leak, struct sock *sk, int oldstate, int newstate)
+{
+ struct prog_test_ref_kfunc *p, *old;
+ struct map_value *v;
+
+ if (newstate != BPF_TCP_SYN_SENT)
+ return 0;
+
+ if (sk_ls_leak_done)
+ return 0;
+
+ v = bpf_sk_storage_get(&race_sk_ls_map, sk, NULL,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!v)
+ return 0;
+
+ p = bpf_kfunc_call_test_acquire(&(unsigned long){0});
+ if (!p)
+ return 0;
+ old = bpf_kptr_xchg(&v->ref_ptr, p);
+ if (old)
+ bpf_kfunc_call_test_release(old);
+
+ bpf_sk_storage_delete(&race_sk_ls_map, sk);
+
+ p = bpf_kfunc_call_test_acquire(&(unsigned long){0});
+ if (!p)
+ return 0;
+ old = bpf_kptr_xchg(&v->ref_ptr, p);
+ if (old)
+ bpf_kfunc_call_test_release(old);
+
+ sk_ls_leak_done = 1;
+ return 0;
+}
+
+long target_map_ptr;
+
+SEC("fentry/bpf_map_put")
+int BPF_PROG(map_put, struct bpf_map *map)
+{
+ if (target_map_id && map->id == (u32)target_map_id)
+ target_map_ptr = (long)map;
+ return 0;
+}
+
+SEC("fexit/htab_map_free")
+int BPF_PROG(htab_map_free, struct bpf_map *map)
+{
+ if (target_map_ptr && (long)map == target_map_ptr)
+ map_freed = 1;
+ return 0;
+}
+
+SEC("fexit/bpf_sk_storage_map_free")
+int BPF_PROG(sk_map_free, struct bpf_map *map)
+{
+ if (target_map_ptr && (long)map == target_map_ptr)
+ map_freed = 1;
+ return 0;
+}
+
+SEC("syscall")
+int count_ref(void *ctx)
+{
+ struct prog_test_ref_kfunc *p;
+ unsigned long arg = 0;
+
+ p = bpf_kfunc_call_test_acquire(&arg);
+ if (!p)
+ return 1;
+
+ num_of_refs = p->cnt.refs.counter;
+
+ bpf_kfunc_call_test_release(p);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c
index 560531404bce..97065a26cf70 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bounds.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c
@@ -1863,4 +1863,141 @@ l1_%=: r0 = 1; \
: __clobber_all);
}
+/* This test covers the bounds deduction when the u64 range and the tnum
+ * overlap only at umax. After instruction 3, the ranges look as follows:
+ *
+ * 0 umin=0xe01 umax=0xf00 U64_MAX
+ * | [xxxxxxxxxxxxxx] |
+ * |----------------------------|------------------------------|
+ * | x x | tnum values
+ *
+ * The verifier can therefore deduce that the R0=0xf0=240.
+ */
+SEC("socket")
+__description("bounds refinement with single-value tnum on umax")
+__msg("3: (15) if r0 == 0xe0 {{.*}} R0=240")
+__success __log_level(2)
+__flag(BPF_F_TEST_REG_INVARIANTS)
+__naked void bounds_refinement_tnum_umax(void *ctx)
+{
+ asm volatile(" \
+ call %[bpf_get_prandom_u32]; \
+ r0 |= 0xe0; \
+ r0 &= 0xf0; \
+ if r0 == 0xe0 goto +2; \
+ if r0 == 0xf0 goto +1; \
+ r10 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/* This test covers the bounds deduction when the u64 range and the tnum
+ * overlap only at umin. After instruction 3, the ranges look as follows:
+ *
+ * 0 umin=0xe00 umax=0xeff U64_MAX
+ * | [xxxxxxxxxxxxxx] |
+ * |----------------------------|------------------------------|
+ * | x x | tnum values
+ *
+ * The verifier can therefore deduce that the R0=0xe0=224.
+ */
+SEC("socket")
+__description("bounds refinement with single-value tnum on umin")
+__msg("3: (15) if r0 == 0xf0 {{.*}} R0=224")
+__success __log_level(2)
+__flag(BPF_F_TEST_REG_INVARIANTS)
+__naked void bounds_refinement_tnum_umin(void *ctx)
+{
+ asm volatile(" \
+ call %[bpf_get_prandom_u32]; \
+ r0 |= 0xe0; \
+ r0 &= 0xf0; \
+ if r0 == 0xf0 goto +2; \
+ if r0 == 0xe0 goto +1; \
+ r10 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/* This test covers the bounds deduction when the only possible tnum value is
+ * in the middle of the u64 range. After instruction 3, the ranges look as
+ * follows:
+ *
+ * 0 umin=0x7cf umax=0x7df U64_MAX
+ * | [xxxxxxxxxxxx] |
+ * |----------------------------|------------------------------|
+ * | x x x x x | tnum values
+ * | +--- 0x7e0
+ * +--- 0x7d0
+ *
+ * Since the lower four bits are zero, the tnum and the u64 range only overlap
+ * in R0=0x7d0=2000. Instruction 5 is therefore dead code.
+ */
+SEC("socket")
+__description("bounds refinement with single-value tnum in middle of range")
+__msg("3: (a5) if r0 < 0x7cf {{.*}} R0=2000")
+__success __log_level(2)
+__naked void bounds_refinement_tnum_middle(void *ctx)
+{
+ asm volatile(" \
+ call %[bpf_get_prandom_u32]; \
+ if r0 & 0x0f goto +4; \
+ if r0 > 0x7df goto +3; \
+ if r0 < 0x7cf goto +2; \
+ if r0 == 0x7d0 goto +1; \
+ r10 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/* This test cover the negative case for the tnum/u64 overlap. Since
+ * they contain the same two values (i.e., {0, 1}), we can't deduce
+ * anything more.
+ */
+SEC("socket")
+__description("bounds refinement: several overlaps between tnum and u64")
+__msg("2: (25) if r0 > 0x1 {{.*}} R0=scalar(smin=smin32=0,smax=umax=smax32=umax32=1,var_off=(0x0; 0x1))")
+__failure __log_level(2)
+__naked void bounds_refinement_several_overlaps(void *ctx)
+{
+ asm volatile(" \
+ call %[bpf_get_prandom_u32]; \
+ if r0 < 0 goto +3; \
+ if r0 > 1 goto +2; \
+ if r0 == 1 goto +1; \
+ r10 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/* This test cover the negative case for the tnum/u64 overlap. Since
+ * they overlap in the two values contained by the u64 range (i.e.,
+ * {0xf, 0x10}), we can't deduce anything more.
+ */
+SEC("socket")
+__description("bounds refinement: multiple overlaps between tnum and u64")
+__msg("2: (25) if r0 > 0x10 {{.*}} R0=scalar(smin=umin=smin32=umin32=15,smax=umax=smax32=umax32=16,var_off=(0x0; 0x1f))")
+__failure __log_level(2)
+__naked void bounds_refinement_multiple_overlaps(void *ctx)
+{
+ asm volatile(" \
+ call %[bpf_get_prandom_u32]; \
+ if r0 < 0xf goto +3; \
+ if r0 > 0x10 goto +2; \
+ if r0 == 0x10 goto +1; \
+ r10 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 02a85dda30e6..0929f4a7bda4 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -1261,14 +1261,8 @@ int get_bpf_max_tramp_links(void)
return ret;
}
-#define MAX_BACKTRACE_SZ 128
-void crash_handler(int signum)
+static void dump_crash_log(void)
{
- void *bt[MAX_BACKTRACE_SZ];
- size_t sz;
-
- sz = backtrace(bt, ARRAY_SIZE(bt));
-
fflush(stdout);
stdout = env.stdout_saved;
stderr = env.stderr_saved;
@@ -1277,12 +1271,32 @@ void crash_handler(int signum)
env.test_state->error_cnt++;
dump_test_log(env.test, env.test_state, true, false, NULL);
}
+}
+
+#define MAX_BACKTRACE_SZ 128
+
+void crash_handler(int signum)
+{
+ void *bt[MAX_BACKTRACE_SZ];
+ size_t sz;
+
+ sz = backtrace(bt, ARRAY_SIZE(bt));
+
+ dump_crash_log();
+
if (env.worker_id != -1)
fprintf(stderr, "[%d]: ", env.worker_id);
fprintf(stderr, "Caught signal #%d!\nStack trace:\n", signum);
backtrace_symbols_fd(bt, sz, STDERR_FILENO);
}
+#ifdef __SANITIZE_ADDRESS__
+void __asan_on_error(void)
+{
+ dump_crash_log();
+}
+#endif
+
void hexdump(const char *prefix, const void *buf, size_t len)
{
for (int i = 0; i < len; i++) {
@@ -1799,7 +1813,7 @@ static int worker_main_send_subtests(int sock, struct test_state *state)
msg.subtest_done.num = i;
- strncpy(msg.subtest_done.name, subtest_state->name, MAX_SUBTEST_NAME);
+ strscpy(msg.subtest_done.name, subtest_state->name, MAX_SUBTEST_NAME);
msg.subtest_done.error_cnt = subtest_state->error_cnt;
msg.subtest_done.skipped = subtest_state->skipped;
@@ -1944,13 +1958,15 @@ int main(int argc, char **argv)
.parser = parse_arg,
.doc = argp_program_doc,
};
+ int err, i;
+
+#ifndef __SANITIZE_ADDRESS__
struct sigaction sigact = {
.sa_handler = crash_handler,
.sa_flags = SA_RESETHAND,
- };
- int err, i;
-
+ };
sigaction(SIGSEGV, &sigact, NULL);
+#endif
env.stdout_saved = stdout;
env.stderr_saved = stderr;
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 27db34ecf3f5..a8ae03c57bba 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -1320,7 +1320,7 @@ static bool cmp_str_seq(const char *log, const char *exp)
printf("FAIL\nTestcase bug\n");
return false;
}
- strncpy(needle, exp, len);
+ memcpy(needle, exp, len);
needle[len] = 0;
q = strstr(log, needle);
if (!q) {
diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c
index 16eb37e5bad6..66af0d13751a 100644
--- a/tools/testing/selftests/bpf/testing_helpers.c
+++ b/tools/testing/selftests/bpf/testing_helpers.c
@@ -212,6 +212,7 @@ int parse_test_list_file(const char *path,
break;
}
+ free(buf);
fclose(f);
return err;
}
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index eeaab7013ca2..0e63daf83ed5 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -24,12 +24,6 @@
#define TRACEFS_PIPE "/sys/kernel/tracing/trace_pipe"
#define DEBUGFS_PIPE "/sys/kernel/debug/tracing/trace_pipe"
-struct ksyms {
- struct ksym *syms;
- size_t sym_cap;
- size_t sym_cnt;
-};
-
static struct ksyms *ksyms;
static pthread_mutex_t ksyms_mutex = PTHREAD_MUTEX_INITIALIZER;
@@ -54,6 +48,8 @@ void free_kallsyms_local(struct ksyms *ksyms)
if (!ksyms)
return;
+ free(ksyms->filtered_syms);
+
if (!ksyms->syms) {
free(ksyms);
return;
@@ -610,7 +606,7 @@ static int search_kallsyms_compare(const void *p1, const struct ksym *p2)
return compare_name(p1, p2->name);
}
-int bpf_get_ksyms(char ***symsp, size_t *cntp, bool kernel)
+int bpf_get_ksyms(struct ksyms **ksymsp, bool kernel)
{
size_t cap = 0, cnt = 0;
char *name = NULL, *ksym_name, **syms = NULL;
@@ -637,8 +633,10 @@ int bpf_get_ksyms(char ***symsp, size_t *cntp, bool kernel)
else
f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r");
- if (!f)
+ if (!f) {
+ free_kallsyms_local(ksyms);
return -EINVAL;
+ }
map = hashmap__new(symbol_hash, symbol_equal, NULL);
if (IS_ERR(map)) {
@@ -679,15 +677,18 @@ int bpf_get_ksyms(char ***symsp, size_t *cntp, bool kernel)
syms[cnt++] = ksym_name;
}
- *symsp = syms;
- *cntp = cnt;
+ ksyms->filtered_syms = syms;
+ ksyms->filtered_cnt = cnt;
+ *ksymsp = ksyms;
error:
free(name);
fclose(f);
hashmap__free(map);
- if (err)
+ if (err) {
free(syms);
+ free_kallsyms_local(ksyms);
+ }
return err;
}
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
index a5576b2dfc26..d5bf1433675d 100644
--- a/tools/testing/selftests/bpf/trace_helpers.h
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -23,7 +23,14 @@ struct ksym {
long addr;
char *name;
};
-struct ksyms;
+
+struct ksyms {
+ struct ksym *syms;
+ size_t sym_cap;
+ size_t sym_cnt;
+ char **filtered_syms;
+ size_t filtered_cnt;
+};
typedef int (*ksym_cmp_t)(const void *p1, const void *p2);
typedef int (*ksym_search_cmp_t)(const void *p1, const struct ksym *p2);
@@ -53,7 +60,7 @@ ssize_t get_rel_offset(uintptr_t addr);
int read_build_id(const char *path, char *build_id, size_t size);
-int bpf_get_ksyms(char ***symsp, size_t *cntp, bool kernel);
+int bpf_get_ksyms(struct ksyms **ksymsp, bool kernel);
int bpf_get_addrs(unsigned long **addrsp, size_t *cntp, bool kernel);
#endif
diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index 1be1e353d40a..75f85e0362f5 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -3378,6 +3378,8 @@ int main(int argc, char **argv)
}
}
free(env.presets[i].atoms);
+ if (env.presets[i].value.type == ENUMERATOR)
+ free(env.presets[i].value.svalue);
}
free(env.presets);
return -err;
diff --git a/tools/testing/selftests/bpf/xdp_features.c b/tools/testing/selftests/bpf/xdp_features.c
index 595c79141cf3..a27ed663967c 100644
--- a/tools/testing/selftests/bpf/xdp_features.c
+++ b/tools/testing/selftests/bpf/xdp_features.c
@@ -16,6 +16,7 @@
#include <network_helpers.h>
+#include "bpf_util.h"
#include "xdp_features.skel.h"
#include "xdp_features.h"
@@ -212,7 +213,7 @@ static void set_env_default(void)
env.feature.drv_feature = NETDEV_XDP_ACT_NDO_XMIT;
env.feature.action = -EINVAL;
env.ifindex = -ENODEV;
- strcpy(env.ifname, "unknown");
+ strscpy(env.ifname, "unknown");
make_sockaddr(AF_INET6, "::ffff:127.0.0.1", DUT_CTRL_PORT,
&env.dut_ctrl_addr, NULL);
make_sockaddr(AF_INET6, "::ffff:127.0.0.1", DUT_ECHO_PORT,
diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
index 3d8de0d4c96a..6db3b5555a22 100644
--- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
@@ -550,7 +550,7 @@ static int rxq_num(const char *ifname)
struct ifreq ifr = {
.ifr_data = (void *)&ch,
};
- strncpy(ifr.ifr_name, ifname, IF_NAMESIZE - 1);
+ strscpy(ifr.ifr_name, ifname);
int fd, ret;
fd = socket(AF_UNIX, SOCK_DGRAM, 0);
@@ -571,7 +571,7 @@ static void hwtstamp_ioctl(int op, const char *ifname, struct hwtstamp_config *c
struct ifreq ifr = {
.ifr_data = (void *)cfg,
};
- strncpy(ifr.ifr_name, ifname, IF_NAMESIZE - 1);
+ strscpy(ifr.ifr_name, ifname);
int fd, ret;
fd = socket(AF_UNIX, SOCK_DGRAM, 0);
diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh
index 5dff3ad53867..a56f4153c64d 100755
--- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh
+++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh
@@ -196,7 +196,6 @@ test_add_proc()
# P<v> = set cpus.partition (0:member, 1:root, 2:isolated)
# C<l> = add cpu-list to cpuset.cpus
# X<l> = add cpu-list to cpuset.cpus.exclusive
-# S<p> = use prefix in subtree_control
# T = put a task into cgroup
# CX<l> = add cpu-list to both cpuset.cpus and cpuset.cpus.exclusive
# O<c>=<v> = Write <v> to CPU online file of <c>
@@ -209,44 +208,46 @@ test_add_proc()
# sched-debug matching which includes offline CPUs and single-CPU partitions
# while the second one is for matching cpuset.cpus.isolated.
#
-SETUP_A123_PARTITIONS="C1-3:P1:S+ C2-3:P1:S+ C3:P1"
+SETUP_A123_PARTITIONS="C1-3:P1 C2-3:P1 C3:P1"
TEST_MATRIX=(
# old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS
# ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ --------
- " C0-1 . . C2-3 S+ C4-5 . . 0 A2:0-1"
+ " C0-1 . . C2-3 . C4-5 . . 0 A2:0-1"
" C0-1 . . C2-3 P1 . . . 0 "
- " C0-1 . . C2-3 P1:S+ C0-1:P1 . . 0 "
- " C0-1 . . C2-3 P1:S+ C1:P1 . . 0 "
- " C0-1:S+ . . C2-3 . . . P1 0 "
- " C0-1:P1 . . C2-3 S+ C1 . . 0 "
- " C0-1:P1 . . C2-3 S+ C1:P1 . . 0 "
- " C0-1:P1 . . C2-3 S+ C1:P1 . P1 0 "
+ " C0-1 . . C2-3 P1 C0-1:P1 . . 0 "
+ " C0-1 . . C2-3 P1 C1:P1 . . 0 "
+ " C0-1 . . C2-3 . . . P1 0 "
+ " C0-1:P1 . . C2-3 . C1 . . 0 "
+ " C0-1:P1 . . C2-3 . C1:P1 . . 0 "
+ " C0-1:P1 . . C2-3 . C1:P1 . P1 0 "
" C0-1:P1 . . C2-3 C4-5 . . . 0 A1:4-5"
- " C0-1:P1 . . C2-3 S+:C4-5 . . . 0 A1:4-5"
" C0-1 . . C2-3:P1 . . . C2 0 "
" C0-1 . . C2-3:P1 . . . C4-5 0 B1:4-5"
- "C0-3:P1:S+ C2-3:P1 . . . . . . 0 A1:0-1|A2:2-3|XA2:2-3"
- "C0-3:P1:S+ C2-3:P1 . . C1-3 . . . 0 A1:1|A2:2-3|XA2:2-3"
- "C2-3:P1:S+ C3:P1 . . C3 . . . 0 A1:|A2:3|XA2:3 A1:P1|A2:P1"
- "C2-3:P1:S+ C3:P1 . . C3 P0 . . 0 A1:3|A2:3 A1:P1|A2:P0"
- "C2-3:P1:S+ C2:P1 . . C2-4 . . . 0 A1:3-4|A2:2"
- "C2-3:P1:S+ C3:P1 . . C3 . . C0-2 0 A1:|B1:0-2 A1:P1|A2:P1"
+ " C0-3:P1 C2-3:P1 . . . . . . 0 A1:0-1|A2:2-3|XA2:2-3"
+ " C0-3:P1 C2-3:P1 . . C1-3 . . . 0 A1:1|A2:2-3|XA2:2-3"
+ " C2-3:P1 C3:P1 . . C3 . . . 0 A1:|A2:3|XA2:3 A1:P1|A2:P1"
+ " C2-3:P1 C3:P1 . . C3 P0 . . 0 A1:3|A2:3 A1:P1|A2:P0"
+ " C2-3:P1 C2:P1 . . C2-4 . . . 0 A1:3-4|A2:2"
+ " C2-3:P1 C3:P1 . . C3 . . C0-2 0 A1:|B1:0-2 A1:P1|A2:P1"
"$SETUP_A123_PARTITIONS . C2-3 . . . 0 A1:|A2:2|A3:3 A1:P1|A2:P1|A3:P1"
# CPU offlining cases:
- " C0-1 . . C2-3 S+ C4-5 . O2=0 0 A1:0-1|B1:3"
- "C0-3:P1:S+ C2-3:P1 . . O2=0 . . . 0 A1:0-1|A2:3"
- "C0-3:P1:S+ C2-3:P1 . . O2=0 O2=1 . . 0 A1:0-1|A2:2-3"
- "C0-3:P1:S+ C2-3:P1 . . O1=0 . . . 0 A1:0|A2:2-3"
- "C0-3:P1:S+ C2-3:P1 . . O1=0 O1=1 . . 0 A1:0-1|A2:2-3"
- "C2-3:P1:S+ C3:P1 . . O3=0 O3=1 . . 0 A1:2|A2:3 A1:P1|A2:P1"
- "C2-3:P1:S+ C3:P2 . . O3=0 O3=1 . . 0 A1:2|A2:3 A1:P1|A2:P2"
- "C2-3:P1:S+ C3:P1 . . O2=0 O2=1 . . 0 A1:2|A2:3 A1:P1|A2:P1"
- "C2-3:P1:S+ C3:P2 . . O2=0 O2=1 . . 0 A1:2|A2:3 A1:P1|A2:P2"
- "C2-3:P1:S+ C3:P1 . . O2=0 . . . 0 A1:|A2:3 A1:P1|A2:P1"
- "C2-3:P1:S+ C3:P1 . . O3=0 . . . 0 A1:2|A2: A1:P1|A2:P1"
- "C2-3:P1:S+ C3:P1 . . T:O2=0 . . . 0 A1:3|A2:3 A1:P1|A2:P-1"
- "C2-3:P1:S+ C3:P1 . . . T:O3=0 . . 0 A1:2|A2:2 A1:P1|A2:P-1"
+ " C0-1 . . C2-3 . C4-5 . O2=0 0 A1:0-1|B1:3"
+ " C0-3:P1 C2-3:P1 . . O2=0 . . . 0 A1:0-1|A2:3"
+ " C0-3:P1 C2-3:P1 . . O2=0 O2=1 . . 0 A1:0-1|A2:2-3"
+ " C0-3:P1 C2-3:P1 . . O1=0 . . . 0 A1:0|A2:2-3"
+ " C0-3:P1 C2-3:P1 . . O1=0 O1=1 . . 0 A1:0-1|A2:2-3"
+ " C2-3:P1 C3:P1 . . O3=0 O3=1 . . 0 A1:2|A2:3 A1:P1|A2:P1"
+ " C2-3:P1 C3:P2 . . O3=0 O3=1 . . 0 A1:2|A2:3 A1:P1|A2:P2"
+ " C2-3:P1 C3:P1 . . O2=0 O2=1 . . 0 A1:2|A2:3 A1:P1|A2:P1"
+ " C2-3:P1 C3:P2 . . O2=0 O2=1 . . 0 A1:2|A2:3 A1:P1|A2:P2"
+ " C2-3:P1 C3:P1 . . O2=0 . . . 0 A1:|A2:3 A1:P1|A2:P1"
+ " C2-3:P1 C3:P1 . . O3=0 . . . 0 A1:2|A2: A1:P1|A2:P1"
+ " C2-3:P1 C3:P1 . . T:O2=0 . . . 0 A1:3|A2:3 A1:P1|A2:P-1"
+ " C2-3:P1 C3:P1 . . . T:O3=0 . . 0 A1:2|A2:2 A1:P1|A2:P-1"
+ " C2-3:P1 C3:P2 . . T:O2=0 . . . 0 A1:3|A2:3 A1:P1|A2:P-2"
+ " C1-3:P1 C3:P2 . . . T:O3=0 . . 0 A1:1-2|A2:1-2 A1:P1|A2:P-2 3|"
+ " C1-3:P1 C3:P2 . . . T:O3=0 O3=1 . 0 A1:1-2|A2:3 A1:P1|A2:P2 3"
"$SETUP_A123_PARTITIONS . O1=0 . . . 0 A1:|A2:2|A3:3 A1:P1|A2:P1|A3:P1"
"$SETUP_A123_PARTITIONS . O2=0 . . . 0 A1:1|A2:|A3:3 A1:P1|A2:P1|A3:P1"
"$SETUP_A123_PARTITIONS . O3=0 . . . 0 A1:1|A2:2|A3: A1:P1|A2:P1|A3:P1"
@@ -264,88 +265,87 @@ TEST_MATRIX=(
#
# Remote partition and cpuset.cpus.exclusive tests
#
- " C0-3:S+ C1-3:S+ C2-3 . X2-3 . . . 0 A1:0-3|A2:1-3|A3:2-3|XA1:2-3"
- " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3:P2 . . 0 A1:0-1|A2:2-3|A3:2-3 A1:P0|A2:P2 2-3"
- " C0-3:S+ C1-3:S+ C2-3 . X2-3 X3:P2 . . 0 A1:0-2|A2:3|A3:3 A1:P0|A2:P2 3"
- " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2 . 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3"
- " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:C3 . 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3"
- " C0-3:S+ C1-3:S+ C2-3 C2-3 . . . P2 0 A1:0-1|A2:1|A3:1|B1:2-3 A1:P0|A3:P0|B1:P2"
- " C0-3:S+ C1-3:S+ C2-3 C4-5 . . . P2 0 B1:4-5 B1:P2 4-5"
- " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2 0 A3:2-3|B1:4 A3:P2|B1:P2 2-4"
- " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2:C1-3 P2 0 A3:2-3|B1:4 A3:P2|B1:P2 2-4"
- " C0-3:S+ C1-3:S+ C2-3 C4 X1-3 X1-3:P2 P2 . 0 A2:1|A3:2-3 A2:P2|A3:P2 1-3"
- " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2:C4-5 0 A3:2-3|B1:4-5 A3:P2|B1:P2 2-5"
- " C4:X0-3:S+ X1-3:S+ X2-3 . . P2 . . 0 A1:4|A2:1-3|A3:1-3 A2:P2 1-3"
- " C4:X0-3:S+ X1-3:S+ X2-3 . . . P2 . 0 A1:4|A2:4|A3:2-3 A3:P2 2-3"
+ " C0-3 C1-3 C2-3 . X2-3 . . . 0 A1:0-3|A2:1-3|A3:2-3|XA1:2-3"
+ " C0-3 C1-3 C2-3 . X2-3 X2-3:P2 . . 0 A1:0-1|A2:2-3|A3:2-3 A1:P0|A2:P2 2-3"
+ " C0-3 C1-3 C2-3 . X2-3 X3:P2 . . 0 A1:0-2|A2:3|A3:3 A1:P0|A2:P2 3"
+ " C0-3 C1-3 C2-3 . X2-3 X2-3 X2-3:P2 . 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3"
+ " C0-3 C1-3 C2-3 . X2-3 X2-3 X2-3:P2:C3 . 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3"
+ " C0-3 C1-3 C2-3 C2-3 . . . P2 0 A1:0-1|A2:1|A3:1|B1:2-3 A1:P0|A3:P0|B1:P2"
+ " C0-3 C1-3 C2-3 C4-5 . . . P2 0 B1:4-5 B1:P2 4-5"
+ " C0-3 C1-3 C2-3 C4 X2-3 X2-3 X2-3:P2 P2 0 A3:2-3|B1:4 A3:P2|B1:P2 2-4"
+ " C0-3 C1-3 C2-3 C4 X2-3 X2-3 X2-3:P2:C1-3 P2 0 A3:2-3|B1:4 A3:P2|B1:P2 2-4"
+ " C0-3 C1-3 C2-3 C4 X1-3 X1-3:P2 P2 . 0 A2:1|A3:2-3 A2:P2|A3:P2 1-3"
+ " C0-3 C1-3 C2-3 C4 X2-3 X2-3 X2-3:P2 P2:C4-5 0 A3:2-3|B1:4-5 A3:P2|B1:P2 2-5"
+ " C4:X0-3 X1-3 X2-3 . . P2 . . 0 A1:4|A2:1-3|A3:1-3 A2:P2 1-3"
+ " C4:X0-3 X1-3 X2-3 . . . P2 . 0 A1:4|A2:4|A3:2-3 A3:P2 2-3"
# Nested remote/local partition tests
- " C0-3:S+ C1-3:S+ C2-3 C4-5 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:|A3:2-3|B1:4-5 \
+ " C0-3 C1-3 C2-3 C4-5 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:|A3:2-3|B1:4-5 \
A1:P0|A2:P1|A3:P2|B1:P1 2-3"
- " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:|A3:2-3|B1:4 \
+ " C0-3 C1-3 C2-3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:|A3:2-3|B1:4 \
A1:P0|A2:P1|A3:P2|B1:P1 2-4|2-3"
- " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 . P1 0 A1:0-1|A2:2-3|A3:2-3|B1:4 \
+ " C0-3 C1-3 C2-3 C4 X2-3 X2-3:P1 . P1 0 A1:0-1|A2:2-3|A3:2-3|B1:4 \
A1:P0|A2:P1|A3:P0|B1:P1"
- " C0-3:S+ C1-3:S+ C3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:2|A3:3|B1:4 \
+ " C0-3 C1-3 C3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:2|A3:3|B1:4 \
A1:P0|A2:P1|A3:P2|B1:P1 2-4|3"
- " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X4:P1 . 0 A1:0-1|A2:2-3|A3:4 \
+ " C0-4 C1-4 C2-4 . X2-4 X2-4:P2 X4:P1 . 0 A1:0-1|A2:2-3|A3:4 \
A1:P0|A2:P2|A3:P1 2-4|2-3"
- " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X3-4:P1 . 0 A1:0-1|A2:2|A3:3-4 \
+ " C0-4 C1-4 C2-4 . X2-4 X2-4:P2 X3-4:P1 . 0 A1:0-1|A2:2|A3:3-4 \
A1:P0|A2:P2|A3:P1 2"
- " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \
+ " C0-4:X2-4 C1-4:X2-4:P2 C2-4:X4:P1 \
. . X5 . . 0 A1:0-4|A2:1-4|A3:2-4 \
A1:P0|A2:P-2|A3:P-1 ."
- " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \
+ " C0-4:X2-4 C1-4:X2-4:P2 C2-4:X4:P1 \
. . . X1 . 0 A1:0-1|A2:2-4|A3:2-4 \
A1:P0|A2:P2|A3:P-1 2-4"
# Remote partition offline tests
- " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 . 0 A1:0-1|A2:1|A3:3 A1:P0|A3:P2 2-3"
- " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 O2=1 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3"
- " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 P2:O3=0 . 0 A1:0-2|A2:1-2|A3: A1:P0|A3:P2 3"
- " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 T:P2:O3=0 . 0 A1:0-2|A2:1-2|A3:1-2 A1:P0|A3:P-2 3|"
+ " C0-3 C1-3 C2-3 . X2-3 X2-3 X2-3:P2:O2=0 . 0 A1:0-1|A2:1|A3:3 A1:P0|A3:P2 2-3"
+ " C0-3 C1-3 C2-3 . X2-3 X2-3 X2-3:P2:O2=0 O2=1 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3"
+ " C0-3 C1-3 C3 . X2-3 X2-3 P2:O3=0 . 0 A1:0-2|A2:1-2|A3: A1:P0|A3:P2 3"
+ " C0-3 C1-3 C3 . X2-3 X2-3 T:P2:O3=0 . 0 A1:0-2|A2:1-2|A3:1-2 A1:P0|A3:P-2 3|"
# An invalidated remote partition cannot self-recover from hotplug
- " C0-3:S+ C1-3:S+ C2 . X2-3 X2-3 T:P2:O2=0 O2=1 0 A1:0-3|A2:1-3|A3:2 A1:P0|A3:P-2 ."
+ " C0-3 C1-3 C2 . X2-3 X2-3 T:P2:O2=0 O2=1 0 A1:0-3|A2:1-3|A3:2 A1:P0|A3:P-2 ."
# cpus.exclusive.effective clearing test
- " C0-3:S+ C1-3:S+ C2 . X2-3:X . . . 0 A1:0-3|A2:1-3|A3:2|XA1:"
+ " C0-3 C1-3 C2 . X2-3:X . . . 0 A1:0-3|A2:1-3|A3:2|XA1:"
# Invalid to valid remote partition transition test
- " C0-3:S+ C1-3 . . . X3:P2 . . 0 A1:0-3|A2:1-3|XA2: A2:P-2 ."
- " C0-3:S+ C1-3:X3:P2
- . . X2-3 P2 . . 0 A1:0-2|A2:3|XA2:3 A2:P2 3"
+ " C0-3 C1-3 . . . X3:P2 . . 0 A1:0-3|A2:1-3|XA2: A2:P-2 ."
+ " C0-3 C1-3:X3:P2 . . X2-3 P2 . . 0 A1:0-2|A2:3|XA2:3 A2:P2 3"
# Invalid to valid local partition direct transition tests
- " C1-3:S+:P2 X4:P2 . . . . . . 0 A1:1-3|XA1:1-3|A2:1-3:XA2: A1:P2|A2:P-2 1-3"
- " C1-3:S+:P2 X4:P2 . . . X3:P2 . . 0 A1:1-2|XA1:1-3|A2:3:XA2:3 A1:P2|A2:P2 1-3"
- " C0-3:P2 . . C4-6 C0-4 . . . 0 A1:0-4|B1:5-6 A1:P2|B1:P0"
- " C0-3:P2 . . C4-6 C0-4:C0-3 . . . 0 A1:0-3|B1:4-6 A1:P2|B1:P0 0-3"
+ " C1-3:P2 X4:P2 . . . . . . 0 A1:1-3|XA1:1-3|A2:1-3:XA2: A1:P2|A2:P-2 1-3"
+ " C1-3:P2 X4:P2 . . . X3:P2 . . 0 A1:1-2|XA1:1-3|A2:3:XA2:3 A1:P2|A2:P2 1-3"
+ " C0-3:P2 . . C4-6 C0-4 . . . 0 A1:0-4|B1:5-6 A1:P2|B1:P0"
+ " C0-3:P2 . . C4-6 C0-4:C0-3 . . . 0 A1:0-3|B1:4-6 A1:P2|B1:P0 0-3"
# Local partition invalidation tests
- " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \
+ " C0-3:X1-3:P2 C1-3:X2-3:P2 C2-3:X3:P2 \
. . . . . 0 A1:1|A2:2|A3:3 A1:P2|A2:P2|A3:P2 1-3"
- " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \
+ " C0-3:X1-3:P2 C1-3:X2-3:P2 C2-3:X3:P2 \
. . X4 . . 0 A1:1-3|A2:1-3|A3:2-3|XA2:|XA3: A1:P2|A2:P-2|A3:P-2 1-3"
- " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \
+ " C0-3:X1-3:P2 C1-3:X2-3:P2 C2-3:X3:P2 \
. . C4:X . . 0 A1:1-3|A2:1-3|A3:2-3|XA2:|XA3: A1:P2|A2:P-2|A3:P-2 1-3"
# Local partition CPU change tests
- " C0-5:S+:P2 C4-5:S+:P1 . . . C3-5 . . 0 A1:0-2|A2:3-5 A1:P2|A2:P1 0-2"
- " C0-5:S+:P2 C4-5:S+:P1 . . C1-5 . . . 0 A1:1-3|A2:4-5 A1:P2|A2:P1 1-3"
+ " C0-5:P2 C4-5:P1 . . . C3-5 . . 0 A1:0-2|A2:3-5 A1:P2|A2:P1 0-2"
+ " C0-5:P2 C4-5:P1 . . C1-5 . . . 0 A1:1-3|A2:4-5 A1:P2|A2:P1 1-3"
# cpus_allowed/exclusive_cpus update tests
- " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \
+ " C0-3:X2-3 C1-3:X2-3 C2-3:X2-3 \
. X:C4 . P2 . 0 A1:4|A2:4|XA2:|XA3:|A3:4 \
A1:P0|A3:P-2 ."
- " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \
+ " C0-3:X2-3 C1-3:X2-3 C2-3:X2-3 \
. X1 . P2 . 0 A1:0-3|A2:1-3|XA1:1|XA2:|XA3:|A3:2-3 \
A1:P0|A3:P-2 ."
- " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \
+ " C0-3:X2-3 C1-3:X2-3 C2-3:X2-3 \
. . X3 P2 . 0 A1:0-2|A2:1-2|XA2:3|XA3:3|A3:3 \
A1:P0|A3:P2 3"
- " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \
+ " C0-3:X2-3 C1-3:X2-3 C2-3:X2-3:P2 \
. . X3 . . 0 A1:0-2|A2:1-2|XA2:3|XA3:3|A3:3|XA3:3 \
A1:P0|A3:P2 3"
- " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \
+ " C0-3:X2-3 C1-3:X2-3 C2-3:X2-3:P2 \
. X4 . . . 0 A1:0-3|A2:1-3|A3:2-3|XA1:4|XA2:|XA3 \
A1:P0|A3:P-2"
@@ -356,37 +356,37 @@ TEST_MATRIX=(
#
# Adding CPUs to partition root that are not in parent's
# cpuset.cpus is allowed, but those extra CPUs are ignored.
- "C2-3:P1:S+ C3:P1 . . . C2-4 . . 0 A1:|A2:2-3 A1:P1|A2:P1"
+ " C2-3:P1 C3:P1 . . . C2-4 . . 0 A1:|A2:2-3 A1:P1|A2:P1"
# Taking away all CPUs from parent or itself if there are tasks
# will make the partition invalid.
- "C2-3:P1:S+ C3:P1 . . T C2-3 . . 0 A1:2-3|A2:2-3 A1:P1|A2:P-1"
- " C3:P1:S+ C3 . . T P1 . . 0 A1:3|A2:3 A1:P1|A2:P-1"
+ " C2-3:P1 C3:P1 . . T C2-3 . . 0 A1:2-3|A2:2-3 A1:P1|A2:P-1"
+ " C3:P1 C3 . . T P1 . . 0 A1:3|A2:3 A1:P1|A2:P-1"
"$SETUP_A123_PARTITIONS . T:C2-3 . . . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P-1|A3:P-1"
"$SETUP_A123_PARTITIONS . T:C2-3:C1-3 . . . 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1"
# Changing a partition root to member makes child partitions invalid
- "C2-3:P1:S+ C3:P1 . . P0 . . . 0 A1:2-3|A2:3 A1:P0|A2:P-1"
+ " C2-3:P1 C3:P1 . . P0 . . . 0 A1:2-3|A2:3 A1:P0|A2:P-1"
"$SETUP_A123_PARTITIONS . C2-3 P0 . . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P0|A3:P-1"
# cpuset.cpus can contains cpus not in parent's cpuset.cpus as long
# as they overlap.
- "C2-3:P1:S+ . . . . C3-4:P1 . . 0 A1:2|A2:3 A1:P1|A2:P1"
+ " C2-3:P1 . . . . C3-4:P1 . . 0 A1:2|A2:3 A1:P1|A2:P1"
# Deletion of CPUs distributed to child cgroup is allowed.
- "C0-1:P1:S+ C1 . C2-3 C4-5 . . . 0 A1:4-5|A2:4-5"
+ " C0-1:P1 C1 . C2-3 C4-5 . . . 0 A1:4-5|A2:4-5"
# To become a valid partition root, cpuset.cpus must overlap parent's
# cpuset.cpus.
- " C0-1:P1 . . C2-3 S+ C4-5:P1 . . 0 A1:0-1|A2:0-1 A1:P1|A2:P-1"
+ " C0-1:P1 . . C2-3 . C4-5:P1 . . 0 A1:0-1|A2:0-1 A1:P1|A2:P-1"
# Enabling partition with child cpusets is allowed
- " C0-1:S+ C1 . C2-3 P1 . . . 0 A1:0-1|A2:1 A1:P1"
+ " C0-1 C1 . C2-3 P1 . . . 0 A1:0-1|A2:1 A1:P1"
# A partition root with non-partition root parent is invalid| but it
# can be made valid if its parent becomes a partition root too.
- " C0-1:S+ C1 . C2-3 . P2 . . 0 A1:0-1|A2:1 A1:P0|A2:P-2"
- " C0-1:S+ C1:P2 . C2-3 P1 . . . 0 A1:0|A2:1 A1:P1|A2:P2 0-1|1"
+ " C0-1 C1 . C2-3 . P2 . . 0 A1:0-1|A2:1 A1:P0|A2:P-2"
+ " C0-1 C1:P2 . C2-3 P1 . . . 0 A1:0|A2:1 A1:P1|A2:P2 0-1|1"
# A non-exclusive cpuset.cpus change will not invalidate its siblings partition.
" C0-1:P1 . . C2-3 C0-2 . . . 0 A1:0-2|B1:3 A1:P1|B1:P0"
@@ -398,23 +398,23 @@ TEST_MATRIX=(
# Child partition root that try to take all CPUs from parent partition
# with tasks will remain invalid.
- " C1-4:P1:S+ P1 . . . . . . 0 A1:1-4|A2:1-4 A1:P1|A2:P-1"
- " C1-4:P1:S+ P1 . . . C1-4 . . 0 A1|A2:1-4 A1:P1|A2:P1"
- " C1-4:P1:S+ P1 . . T C1-4 . . 0 A1:1-4|A2:1-4 A1:P1|A2:P-1"
+ " C1-4:P1 P1 . . . . . . 0 A1:1-4|A2:1-4 A1:P1|A2:P-1"
+ " C1-4:P1 P1 . . . C1-4 . . 0 A1|A2:1-4 A1:P1|A2:P1"
+ " C1-4:P1 P1 . . T C1-4 . . 0 A1:1-4|A2:1-4 A1:P1|A2:P-1"
# Clearing of cpuset.cpus with a preset cpuset.cpus.exclusive shouldn't
# affect cpuset.cpus.exclusive.effective.
- " C1-4:X3:S+ C1:X3 . . . C . . 0 A2:1-4|XA2:3"
+ " C1-4:X3 C1:X3 . . . C . . 0 A2:1-4|XA2:3"
# cpuset.cpus can contain CPUs that overlap a sibling cpuset with cpus.exclusive
# but creating a local partition out of it is not allowed. Similarly and change
# in cpuset.cpus of a local partition that overlaps sibling exclusive CPUs will
# invalidate it.
- " CX1-4:S+ CX2-4:P2 . C5-6 . . . P1 0 A1:1|A2:2-4|B1:5-6|XB1:5-6 \
+ " CX1-4 CX2-4:P2 . C5-6 . . . P1 0 A1:1|A2:2-4|B1:5-6|XB1:5-6 \
A1:P0|A2:P2:B1:P1 2-4"
- " CX1-4:S+ CX2-4:P2 . C3-6 . . . P1 0 A1:1|A2:2-4|B1:5-6 \
+ " CX1-4 CX2-4:P2 . C3-6 . . . P1 0 A1:1|A2:2-4|B1:5-6 \
A1:P0|A2:P2:B1:P-1 2-4"
- " CX1-4:S+ CX2-4:P2 . C5-6 . . . P1:C3-6 0 A1:1|A2:2-4|B1:5-6 \
+ " CX1-4 CX2-4:P2 . C5-6 . . . P1:C3-6 0 A1:1|A2:2-4|B1:5-6 \
A1:P0|A2:P2:B1:P-1 2-4"
# When multiple partitions with conflicting cpuset.cpus are created, the
@@ -426,14 +426,14 @@ TEST_MATRIX=(
" C1-3:X1-3 . . C4-5 . . . C1-2 0 A1:1-3|B1:1-2"
# cpuset.cpus can become empty with task in it as it inherits parent's effective CPUs
- " C1-3:S+ C2 . . . T:C . . 0 A1:1-3|A2:1-3"
+ " C1-3 C2 . . . T:C . . 0 A1:1-3|A2:1-3"
# old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS
# ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ --------
# Failure cases:
# A task cannot be added to a partition with no cpu
- "C2-3:P1:S+ C3:P1 . . O2=0:T . . . 1 A1:|A2:3 A1:P1|A2:P1"
+ " C2-3:P1 C3:P1 . . O2=0:T . . . 1 A1:|A2:3 A1:P1|A2:P1"
# Changes to cpuset.cpus.exclusive that violate exclusivity rule is rejected
" C0-3 . . C4-5 X0-3 . . X3-5 1 A1:0-3|B1:4-5"
@@ -465,31 +465,31 @@ REMOTE_TEST_MATRIX=(
# old-p1 old-p2 old-c11 old-c12 old-c21 old-c22
# new-p1 new-p2 new-c11 new-c12 new-c21 new-c22 ECPUs Pstate ISOLCPUS
# ------ ------ ------- ------- ------- ------- ----- ------ --------
- " X1-3:S+ X4-6:S+ X1-2 X3 X4-5 X6 \
+ " X1-3 X4-6 X1-2 X3 X4-5 X6 \
. . P2 P2 P2 P2 c11:1-2|c12:3|c21:4-5|c22:6 \
c11:P2|c12:P2|c21:P2|c22:P2 1-6"
- " CX1-4:S+ . X1-2:P2 C3 . . \
+ " CX1-4 . X1-2:P2 C3 . . \
. . . C3-4 . . p1:3-4|c11:1-2|c12:3-4 \
p1:P0|c11:P2|c12:P0 1-2"
- " CX1-4:S+ . X1-2:P2 . . . \
+ " CX1-4 . X1-2:P2 . . . \
X2-4 . . . . . p1:1,3-4|c11:2 \
p1:P0|c11:P2 2"
- " CX1-5:S+ . X1-2:P2 X3-5:P1 . . \
+ " CX1-5 . X1-2:P2 X3-5:P1 . . \
X2-4 . . . . . p1:1,5|c11:2|c12:3-4 \
p1:P0|c11:P2|c12:P1 2"
- " CX1-4:S+ . X1-2:P2 X3-4:P1 . . \
+ " CX1-4 . X1-2:P2 X3-4:P1 . . \
. . X2 . . . p1:1|c11:2|c12:3-4 \
p1:P0|c11:P2|c12:P1 2"
# p1 as member, will get its effective CPUs from its parent rtest
- " CX1-4:S+ . X1-2:P2 X3-4:P1 . . \
+ " CX1-4 . X1-2:P2 X3-4:P1 . . \
. . X1 CX2-4 . . p1:5-7|c11:1|c12:2-4 \
p1:P0|c11:P2|c12:P1 1"
- " CX1-4:S+ X5-6:P1:S+ . . . . \
- . . X1-2:P2 X4-5:P1 . X1-7:P2 p1:3|c11:1-2|c12:4:c22:5-6 \
+ " CX1-4 X5-6:P1 . . . . \
+ . . X1-2:P2 X4-5:P1 . X1-7:P2 p1:3|c11:1-2|c12:4:c22:5-6 \
p1:P0|p2:P1|c11:P2|c12:P1|c22:P2 \
1-2,4-6|1-2,5-6"
# c12 whose cpuset.cpus CPUs are all granted to c11 will become invalid partition
- " C1-5:P1:S+ . C1-4:P1 C2-3 . . \
+ " C1-5:P1 . C1-4:P1 C2-3 . . \
. . . P1 . . p1:5|c11:1-4|c12:5 \
p1:P1|c11:P1|c12:P-1"
)
@@ -530,7 +530,6 @@ set_ctrl_state()
CGRP=$1
STATE=$2
SHOWERR=${3}
- CTRL=${CTRL:=$CONTROLLER}
HASERR=0
REDIRECT="2> $TMPMSG"
[[ -z "$STATE" || "$STATE" = '.' ]] && return 0
@@ -540,15 +539,16 @@ set_ctrl_state()
for CMD in $(echo $STATE | sed -e "s/:/ /g")
do
TFILE=$CGRP/cgroup.procs
- SFILE=$CGRP/cgroup.subtree_control
PFILE=$CGRP/cpuset.cpus.partition
CFILE=$CGRP/cpuset.cpus
XFILE=$CGRP/cpuset.cpus.exclusive
- case $CMD in
- S*) PREFIX=${CMD#?}
- COMM="echo ${PREFIX}${CTRL} > $SFILE"
+
+ # Enable cpuset controller if not enabled yet
+ [[ -f $CFILE ]] || {
+ COMM="echo +cpuset > $CGRP/../cgroup.subtree_control"
eval $COMM $REDIRECT
- ;;
+ }
+ case $CMD in
X*)
CPUS=${CMD#?}
COMM="echo $CPUS > $XFILE"
@@ -764,7 +764,7 @@ check_cgroup_states()
# only CPUs in isolated partitions as well as those that are isolated at
# boot time.
#
-# $1 - expected isolated cpu list(s) <isolcpus1>{,<isolcpus2>}
+# $1 - expected isolated cpu list(s) <isolcpus1>{|<isolcpus2>}
# <isolcpus1> - expected sched/domains value
# <isolcpus2> - cpuset.cpus.isolated value = <isolcpus1> if not defined
#
@@ -773,6 +773,7 @@ check_isolcpus()
EXPECTED_ISOLCPUS=$1
ISCPUS=${CGROUP2}/cpuset.cpus.isolated
ISOLCPUS=$(cat $ISCPUS)
+ HKICPUS=$(cat /sys/devices/system/cpu/isolated)
LASTISOLCPU=
SCHED_DOMAINS=/sys/kernel/debug/sched/domains
if [[ $EXPECTED_ISOLCPUS = . ]]
@@ -811,6 +812,11 @@ check_isolcpus()
EXPECTED_ISOLCPUS=$EXPECTED_SDOMAIN
#
+ # The inverse of HK_TYPE_DOMAIN cpumask in $HKICPUS should match $ISOLCPUS
+ #
+ [[ "$ISOLCPUS" != "$HKICPUS" ]] && return 1
+
+ #
# Use the sched domain in debugfs to check isolated CPUs, if available
#
[[ -d $SCHED_DOMAINS ]] || return 0
@@ -947,7 +953,6 @@ check_test_results()
run_state_test()
{
TEST=$1
- CONTROLLER=cpuset
CGROUP_LIST=". A1 A1/A2 A1/A2/A3 B1"
RESET_LIST="A1/A2/A3 A1/A2 A1 B1"
I=0
@@ -1003,7 +1008,6 @@ run_state_test()
run_remote_state_test()
{
TEST=$1
- CONTROLLER=cpuset
[[ -d rtest ]] || mkdir rtest
cd rtest
echo +cpuset > cgroup.subtree_control
diff --git a/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c b/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c
index 61e55dfbf121..e19ff8168baf 100644
--- a/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c
+++ b/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c
@@ -37,17 +37,20 @@ FIXTURE(iterate_mount_namespaces) {
__u64 mnt_ns_id[MNT_NS_COUNT];
};
+static inline bool mntns_in_list(__u64 *mnt_ns_id, struct mnt_ns_info *info)
+{
+ for (int i = 0; i < MNT_NS_COUNT; i++) {
+ if (mnt_ns_id[i] == info->mnt_ns_id)
+ return true;
+ }
+ return false;
+}
+
FIXTURE_SETUP(iterate_mount_namespaces)
{
for (int i = 0; i < MNT_NS_COUNT; i++)
self->fd_mnt_ns[i] = -EBADF;
- /*
- * Creating a new user namespace let's us guarantee that we only see
- * mount namespaces that we did actually create.
- */
- ASSERT_EQ(unshare(CLONE_NEWUSER), 0);
-
for (int i = 0; i < MNT_NS_COUNT; i++) {
struct mnt_ns_info info = {};
@@ -75,13 +78,15 @@ TEST_F(iterate_mount_namespaces, iterate_all_forward)
fd_mnt_ns_cur = fcntl(self->fd_mnt_ns[0], F_DUPFD_CLOEXEC);
ASSERT_GE(fd_mnt_ns_cur, 0);
- for (;; count++) {
+ for (;;) {
struct mnt_ns_info info = {};
int fd_mnt_ns_next;
fd_mnt_ns_next = ioctl(fd_mnt_ns_cur, NS_MNT_GET_NEXT, &info);
if (fd_mnt_ns_next < 0 && errno == ENOENT)
break;
+ if (mntns_in_list(self->mnt_ns_id, &info))
+ count++;
ASSERT_GE(fd_mnt_ns_next, 0);
ASSERT_EQ(close(fd_mnt_ns_cur), 0);
fd_mnt_ns_cur = fd_mnt_ns_next;
@@ -96,13 +101,15 @@ TEST_F(iterate_mount_namespaces, iterate_all_backwards)
fd_mnt_ns_cur = fcntl(self->fd_mnt_ns[MNT_NS_LAST_INDEX], F_DUPFD_CLOEXEC);
ASSERT_GE(fd_mnt_ns_cur, 0);
- for (;; count++) {
+ for (;;) {
struct mnt_ns_info info = {};
int fd_mnt_ns_prev;
fd_mnt_ns_prev = ioctl(fd_mnt_ns_cur, NS_MNT_GET_PREV, &info);
if (fd_mnt_ns_prev < 0 && errno == ENOENT)
break;
+ if (mntns_in_list(self->mnt_ns_id, &info))
+ count++;
ASSERT_GE(fd_mnt_ns_prev, 0);
ASSERT_EQ(close(fd_mnt_ns_cur), 0);
fd_mnt_ns_cur = fd_mnt_ns_prev;
@@ -125,7 +132,6 @@ TEST_F(iterate_mount_namespaces, iterate_forward)
ASSERT_GE(fd_mnt_ns_next, 0);
ASSERT_EQ(close(fd_mnt_ns_cur), 0);
fd_mnt_ns_cur = fd_mnt_ns_next;
- ASSERT_EQ(info.mnt_ns_id, self->mnt_ns_id[i]);
}
}
@@ -144,7 +150,6 @@ TEST_F(iterate_mount_namespaces, iterate_backward)
ASSERT_GE(fd_mnt_ns_prev, 0);
ASSERT_EQ(close(fd_mnt_ns_cur), 0);
fd_mnt_ns_cur = fd_mnt_ns_prev;
- ASSERT_EQ(info.mnt_ns_id, self->mnt_ns_id[i]);
}
}
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index 16a119a4656c..4afaef01c22e 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -76,6 +76,9 @@ static inline void __kselftest_memset_safe(void *s, int c, size_t n)
memset(s, c, n);
}
+#define KSELFTEST_PRIO_TEST_F 20000
+#define KSELFTEST_PRIO_XFAIL 20001
+
#define TEST_TIMEOUT_DEFAULT 30
/* Utilities exposed to the test definitions */
@@ -465,7 +468,7 @@ static inline void __kselftest_memset_safe(void *s, int c, size_t n)
fixture_name##_teardown(_metadata, self, variant); \
} \
static struct __test_metadata *_##fixture_name##_##test_name##_object; \
- static void __attribute__((constructor)) \
+ static void __attribute__((constructor(KSELFTEST_PRIO_TEST_F))) \
_register_##fixture_name##_##test_name(void) \
{ \
struct __test_metadata *object = mmap(NULL, sizeof(*object), \
@@ -880,7 +883,7 @@ struct __test_xfail {
.fixture = &_##fixture_name##_fixture_object, \
.variant = &_##fixture_name##_##variant_name##_object, \
}; \
- static void __attribute__((constructor)) \
+ static void __attribute__((constructor(KSELFTEST_PRIO_XFAIL))) \
_register_##fixture_name##_##variant_name##_##test_name##_xfail(void) \
{ \
_##fixture_name##_##variant_name##_##test_name##_xfail.test = \
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 997056697ec8..a24ea64e2ae8 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -15,6 +15,7 @@ TEST_PROGS := \
big_tcp.sh \
bind_bhash.sh \
bpf_offload.py \
+ bridge_vlan_dump.sh \
broadcast_ether_dst.sh \
broadcast_pmtu.sh \
busy_poll_test.sh \
diff --git a/tools/testing/selftests/net/bridge_vlan_dump.sh b/tools/testing/selftests/net/bridge_vlan_dump.sh
new file mode 100755
index 000000000000..ad66731d2a6f
--- /dev/null
+++ b/tools/testing/selftests/net/bridge_vlan_dump.sh
@@ -0,0 +1,204 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test bridge VLAN range grouping. VLANs are collapsed into a range entry in
+# the dump if they have the same per-VLAN options. These tests verify that
+# VLANs with different per-VLAN option values are not grouped together.
+
+# shellcheck disable=SC1091,SC2034,SC2154,SC2317
+source lib.sh
+
+ALL_TESTS="
+ vlan_range_neigh_suppress
+ vlan_range_mcast_max_groups
+ vlan_range_mcast_n_groups
+ vlan_range_mcast_enabled
+"
+
+setup_prepare()
+{
+ setup_ns NS
+ defer cleanup_all_ns
+
+ ip -n "$NS" link add name br0 type bridge vlan_filtering 1 \
+ vlan_default_pvid 0 mcast_snooping 1 mcast_vlan_snooping 1
+ ip -n "$NS" link set dev br0 up
+
+ ip -n "$NS" link add name dummy0 type dummy
+ ip -n "$NS" link set dev dummy0 master br0
+ ip -n "$NS" link set dev dummy0 up
+}
+
+vlan_range_neigh_suppress()
+{
+ RET=0
+
+ # Add two new consecutive VLANs for range grouping test
+ bridge -n "$NS" vlan add vid 10 dev dummy0
+ defer bridge -n "$NS" vlan del vid 10 dev dummy0
+
+ bridge -n "$NS" vlan add vid 11 dev dummy0
+ defer bridge -n "$NS" vlan del vid 11 dev dummy0
+
+ # Configure different neigh_suppress values and verify no range grouping
+ bridge -n "$NS" vlan set vid 10 dev dummy0 neigh_suppress on
+ check_err $? "Failed to set neigh_suppress for VLAN 10"
+
+ bridge -n "$NS" vlan set vid 11 dev dummy0 neigh_suppress off
+ check_err $? "Failed to set neigh_suppress for VLAN 11"
+
+ # Verify VLANs are not shown as a range, but individual entries exist
+ bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11"
+ check_fail $? "VLANs with different neigh_suppress incorrectly grouped"
+
+ bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+10$|^\s+10$"
+ check_err $? "VLAN 10 individual entry not found"
+
+ bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+11$|^\s+11$"
+ check_err $? "VLAN 11 individual entry not found"
+
+ # Configure same neigh_suppress value and verify range grouping
+ bridge -n "$NS" vlan set vid 11 dev dummy0 neigh_suppress on
+ check_err $? "Failed to set neigh_suppress for VLAN 11"
+
+ bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11"
+ check_err $? "VLANs with same neigh_suppress not grouped"
+
+ log_test "VLAN range grouping with neigh_suppress"
+}
+
+vlan_range_mcast_max_groups()
+{
+ RET=0
+
+ # Add two new consecutive VLANs for range grouping test
+ bridge -n "$NS" vlan add vid 10 dev dummy0
+ defer bridge -n "$NS" vlan del vid 10 dev dummy0
+
+ bridge -n "$NS" vlan add vid 11 dev dummy0
+ defer bridge -n "$NS" vlan del vid 11 dev dummy0
+
+ # Configure different mcast_max_groups values and verify no range grouping
+ bridge -n "$NS" vlan set vid 10 dev dummy0 mcast_max_groups 100
+ check_err $? "Failed to set mcast_max_groups for VLAN 10"
+
+ bridge -n "$NS" vlan set vid 11 dev dummy0 mcast_max_groups 200
+ check_err $? "Failed to set mcast_max_groups for VLAN 11"
+
+ # Verify VLANs are not shown as a range, but individual entries exist
+ bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11"
+ check_fail $? "VLANs with different mcast_max_groups incorrectly grouped"
+
+ bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+10$|^\s+10$"
+ check_err $? "VLAN 10 individual entry not found"
+
+ bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+11$|^\s+11$"
+ check_err $? "VLAN 11 individual entry not found"
+
+ # Configure same mcast_max_groups value and verify range grouping
+ bridge -n "$NS" vlan set vid 11 dev dummy0 mcast_max_groups 100
+ check_err $? "Failed to set mcast_max_groups for VLAN 11"
+
+ bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11"
+ check_err $? "VLANs with same mcast_max_groups not grouped"
+
+ log_test "VLAN range grouping with mcast_max_groups"
+}
+
+vlan_range_mcast_n_groups()
+{
+ RET=0
+
+ # Add two new consecutive VLANs for range grouping test
+ bridge -n "$NS" vlan add vid 10 dev dummy0
+ defer bridge -n "$NS" vlan del vid 10 dev dummy0
+
+ bridge -n "$NS" vlan add vid 11 dev dummy0
+ defer bridge -n "$NS" vlan del vid 11 dev dummy0
+
+ # Add different numbers of multicast groups to each VLAN
+ bridge -n "$NS" mdb add dev br0 port dummy0 grp 239.1.1.1 vid 10
+ check_err $? "Failed to add mdb entry to VLAN 10"
+ defer bridge -n "$NS" mdb del dev br0 port dummy0 grp 239.1.1.1 vid 10
+
+ bridge -n "$NS" mdb add dev br0 port dummy0 grp 239.1.1.2 vid 10
+ check_err $? "Failed to add second mdb entry to VLAN 10"
+ defer bridge -n "$NS" mdb del dev br0 port dummy0 grp 239.1.1.2 vid 10
+
+ bridge -n "$NS" mdb add dev br0 port dummy0 grp 239.1.1.1 vid 11
+ check_err $? "Failed to add mdb entry to VLAN 11"
+ defer bridge -n "$NS" mdb del dev br0 port dummy0 grp 239.1.1.1 vid 11
+
+ # Verify VLANs are not shown as a range due to different mcast_n_groups
+ bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11"
+ check_fail $? "VLANs with different mcast_n_groups incorrectly grouped"
+
+ bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+10$|^\s+10$"
+ check_err $? "VLAN 10 individual entry not found"
+
+ bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+11$|^\s+11$"
+ check_err $? "VLAN 11 individual entry not found"
+
+ # Add another group to VLAN 11 to match VLAN 10's count
+ bridge -n "$NS" mdb add dev br0 port dummy0 grp 239.1.1.2 vid 11
+ check_err $? "Failed to add second mdb entry to VLAN 11"
+ defer bridge -n "$NS" mdb del dev br0 port dummy0 grp 239.1.1.2 vid 11
+
+ bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11"
+ check_err $? "VLANs with same mcast_n_groups not grouped"
+
+ log_test "VLAN range grouping with mcast_n_groups"
+}
+
+vlan_range_mcast_enabled()
+{
+ RET=0
+
+ # Add two new consecutive VLANs for range grouping test
+ bridge -n "$NS" vlan add vid 10 dev br0 self
+ defer bridge -n "$NS" vlan del vid 10 dev br0 self
+
+ bridge -n "$NS" vlan add vid 11 dev br0 self
+ defer bridge -n "$NS" vlan del vid 11 dev br0 self
+
+ bridge -n "$NS" vlan add vid 10 dev dummy0
+ defer bridge -n "$NS" vlan del vid 10 dev dummy0
+
+ bridge -n "$NS" vlan add vid 11 dev dummy0
+ defer bridge -n "$NS" vlan del vid 11 dev dummy0
+
+ # Configure different mcast_snooping for bridge VLANs
+ # Port VLANs inherit BR_VLFLAG_MCAST_ENABLED from bridge VLANs
+ bridge -n "$NS" vlan global set dev br0 vid 10 mcast_snooping 1
+ bridge -n "$NS" vlan global set dev br0 vid 11 mcast_snooping 0
+
+ # Verify port VLANs are not grouped due to different mcast_enabled
+ bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11"
+ check_fail $? "VLANs with different mcast_enabled incorrectly grouped"
+
+ bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+10$|^\s+10$"
+ check_err $? "VLAN 10 individual entry not found"
+
+ bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+11$|^\s+11$"
+ check_err $? "VLAN 11 individual entry not found"
+
+ # Configure same mcast_snooping and verify range grouping
+ bridge -n "$NS" vlan global set dev br0 vid 11 mcast_snooping 1
+
+ bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11"
+ check_err $? "VLANs with same mcast_enabled not grouped"
+
+ log_test "VLAN range grouping with mcast_enabled"
+}
+
+# Verify the newest tested option is supported
+if ! bridge vlan help 2>&1 | grep -q "neigh_suppress"; then
+ echo "SKIP: iproute2 too old, missing per-VLAN neighbor suppression support"
+ exit "$ksft_skip"
+fi
+
+trap defer_scopes_cleanup EXIT
+setup_prepare
+tests_run
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index 21026b667667..6eb7f95e70e1 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -1672,6 +1672,17 @@ ipv4_withv6_fcnal()
run_cmd "$IP ro replace 172.16.101.1/32 via inet6 2001:db8:50::1 dev veth1"
log_test $? 2 "IPv4 route with invalid IPv6 gateway"
+
+ # Test IPv4 route with loopback IPv6 nexthop
+ # Regression test: loopback IPv6 nexthop was misclassified as reject
+ # route, skipping nhc_pcpu_rth_output allocation, causing panic when
+ # an IPv4 route references it and triggers __mkroute_output().
+ run_cmd "$IP -6 nexthop add id 20 dev lo"
+ run_cmd "$IP ro add 172.20.20.0/24 nhid 20"
+ run_cmd "ip netns exec $me ping -c1 -W1 172.20.20.1"
+ log_test $? 1 "IPv4 route with loopback IPv6 nexthop (no crash)"
+ run_cmd "$IP ro del 172.20.20.0/24"
+ run_cmd "$IP nexthop del id 20"
}
ipv4_fcnal_runtime()
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index dc1f200aaa81..a3144d7298a5 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -104,6 +104,24 @@ CBPF_MPTCP_SUBOPTION_ADD_ADDR="14,
6 0 0 65535,
6 0 0 0"
+# IPv4: TCP hdr of 48B, a first suboption of 12B (DACK8), the RM_ADDR suboption
+# generated using "nfbpf_compile '(ip[32] & 0xf0) == 0xc0 && ip[53] == 0x0c &&
+# (ip[66] & 0xf0) == 0x40'"
+CBPF_MPTCP_SUBOPTION_RM_ADDR="13,
+ 48 0 0 0,
+ 84 0 0 240,
+ 21 0 9 64,
+ 48 0 0 32,
+ 84 0 0 240,
+ 21 0 6 192,
+ 48 0 0 53,
+ 21 0 4 12,
+ 48 0 0 66,
+ 84 0 0 240,
+ 21 0 1 64,
+ 6 0 0 65535,
+ 6 0 0 0"
+
init_partial()
{
capout=$(mktemp)
@@ -2608,6 +2626,19 @@ remove_tests()
chk_rst_nr 0 0
fi
+ # signal+subflow with limits, remove
+ if reset "remove signal+subflow with limits"; then
+ pm_nl_set_limits $ns1 0 0
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,subflow
+ pm_nl_set_limits $ns2 0 0
+ addr_nr_ns1=-1 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 0 0 0
+ chk_add_nr 1 1
+ chk_rm_nr 1 0 invert
+ chk_rst_nr 0 0
+ fi
+
# addresses remove
if reset "remove addresses"; then
pm_nl_set_limits $ns1 3 3
@@ -4217,6 +4248,14 @@ endpoint_tests()
chk_subflow_nr "after no reject" 3
chk_mptcp_info subflows 2 subflows 2
+ # To make sure RM_ADDR are sent over a different subflow, but
+ # allow the rest to quickly and cleanly close the subflow
+ local ipt=1
+ ip netns exec "${ns2}" ${iptables} -I OUTPUT -s "10.0.1.2" \
+ -p tcp -m tcp --tcp-option 30 \
+ -m bpf --bytecode \
+ "$CBPF_MPTCP_SUBOPTION_RM_ADDR" \
+ -j DROP || ipt=0
local i
for i in $(seq 3); do
pm_nl_del_endpoint $ns2 1 10.0.1.2
@@ -4229,6 +4268,7 @@ endpoint_tests()
chk_subflow_nr "after re-add id 0 ($i)" 3
chk_mptcp_info subflows 3 subflows 3
done
+ [ ${ipt} = 1 ] && ip netns exec "${ns2}" ${iptables} -D OUTPUT 1
mptcp_lib_kill_group_wait $tests_pid
@@ -4288,11 +4328,20 @@ endpoint_tests()
chk_mptcp_info subflows 2 subflows 2
chk_mptcp_info add_addr_signal 2 add_addr_accepted 2
+ # To make sure RM_ADDR are sent over a different subflow, but
+ # allow the rest to quickly and cleanly close the subflow
+ local ipt=1
+ ip netns exec "${ns1}" ${iptables} -I OUTPUT -s "10.0.1.1" \
+ -p tcp -m tcp --tcp-option 30 \
+ -m bpf --bytecode \
+ "$CBPF_MPTCP_SUBOPTION_RM_ADDR" \
+ -j DROP || ipt=0
pm_nl_del_endpoint $ns1 42 10.0.1.1
sleep 0.5
chk_subflow_nr "after delete ID 0" 2
chk_mptcp_info subflows 2 subflows 2
chk_mptcp_info add_addr_signal 2 add_addr_accepted 2
+ [ ${ipt} = 1 ] && ip netns exec "${ns1}" ${iptables} -D OUTPUT 1
pm_nl_add_endpoint $ns1 10.0.1.1 id 99 flags signal
wait_mpj 4
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index 806aaa7d2d61..d11a8b949aab 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -237,10 +237,13 @@ run_test()
for dev in ns2eth1 ns2eth2; do
tc -n $ns2 qdisc del dev $dev root >/dev/null 2>&1
done
- tc -n $ns1 qdisc add dev ns1eth1 root netem rate ${rate1}mbit $delay1
- tc -n $ns1 qdisc add dev ns1eth2 root netem rate ${rate2}mbit $delay2
- tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1
- tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2
+
+ # keep the queued pkts number low, or the RTT estimator will see
+ # increasing latency over time.
+ tc -n $ns1 qdisc add dev ns1eth1 root netem rate ${rate1}mbit $delay1 limit 50
+ tc -n $ns1 qdisc add dev ns1eth2 root netem rate ${rate2}mbit $delay2 limit 50
+ tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1 limit 50
+ tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2 limit 50
# time is measured in ms, account for transfer size, aggregated link speed
# and header overhead (10%)
diff --git a/tools/testing/selftests/net/netfilter/nf_queue.c b/tools/testing/selftests/net/netfilter/nf_queue.c
index 9e56b9d47037..116c0ca0eabb 100644
--- a/tools/testing/selftests/net/netfilter/nf_queue.c
+++ b/tools/testing/selftests/net/netfilter/nf_queue.c
@@ -18,6 +18,7 @@
struct options {
bool count_packets;
bool gso_enabled;
+ bool failopen;
int verbose;
unsigned int queue_num;
unsigned int timeout;
@@ -30,7 +31,7 @@ static struct options opts;
static void help(const char *p)
{
- printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num] [-Qdst_queue ] [ -d ms_delay ] [-G]\n", p);
+ printf("Usage: %s [-c|-v [-vv] ] [-o] [-t timeout] [-q queue_num] [-Qdst_queue ] [ -d ms_delay ] [-G]\n", p);
}
static int parse_attr_cb(const struct nlattr *attr, void *data)
@@ -236,6 +237,8 @@ struct mnl_socket *open_queue(void)
flags = opts.gso_enabled ? NFQA_CFG_F_GSO : 0;
flags |= NFQA_CFG_F_UID_GID;
+ if (opts.failopen)
+ flags |= NFQA_CFG_F_FAIL_OPEN;
mnl_attr_put_u32(nlh, NFQA_CFG_FLAGS, htonl(flags));
mnl_attr_put_u32(nlh, NFQA_CFG_MASK, htonl(flags));
@@ -329,7 +332,7 @@ static void parse_opts(int argc, char **argv)
{
int c;
- while ((c = getopt(argc, argv, "chvt:q:Q:d:G")) != -1) {
+ while ((c = getopt(argc, argv, "chvot:q:Q:d:G")) != -1) {
switch (c) {
case 'c':
opts.count_packets = true;
@@ -366,6 +369,9 @@ static void parse_opts(int argc, char **argv)
case 'G':
opts.gso_enabled = false;
break;
+ case 'o':
+ opts.failopen = true;
+ break;
case 'v':
opts.verbose++;
break;
diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh
index 139bc1211878..ea766bdc5d04 100755
--- a/tools/testing/selftests/net/netfilter/nft_queue.sh
+++ b/tools/testing/selftests/net/netfilter/nft_queue.sh
@@ -591,6 +591,7 @@ EOF
test_udp_gro_ct()
{
local errprefix="FAIL: test_udp_gro_ct:"
+ local timeout=5
ip netns exec "$nsrouter" conntrack -F 2>/dev/null
@@ -630,10 +631,10 @@ table inet udpq {
}
}
EOF
- timeout 10 ip netns exec "$ns2" socat UDP-LISTEN:12346,fork,pf=ipv4 OPEN:"$TMPFILE1",trunc &
+ timeout "$timeout" ip netns exec "$ns2" socat UDP-LISTEN:12346,fork,pf=ipv4 OPEN:"$TMPFILE1",trunc &
local rpid=$!
- ip netns exec "$nsrouter" ./nf_queue -G -c -q 1 -t 2 > "$TMPFILE2" &
+ ip netns exec "$nsrouter" nice -n -19 ./nf_queue -G -c -q 1 -o -t 2 > "$TMPFILE2" &
local nfqpid=$!
ip netns exec "$nsrouter" ethtool -K "veth0" rx-udp-gro-forwarding on rx-gro-list on generic-receive-offload on
@@ -643,8 +644,12 @@ EOF
local bs=512
local count=$(((32 * 1024 * 1024) / bs))
- dd if=/dev/zero bs="$bs" count="$count" 2>/dev/null | for i in $(seq 1 16); do
- timeout 5 ip netns exec "$ns1" \
+
+ local nprocs=$(nproc)
+ [ $nprocs -gt 1 ] && nprocs=$((nprocs - 1))
+
+ dd if=/dev/zero bs="$bs" count="$count" 2>/dev/null | for i in $(seq 1 $nprocs); do
+ timeout "$timeout" nice -n 19 ip netns exec "$ns1" \
socat -u -b 512 STDIN UDP-DATAGRAM:10.0.2.99:12346,reuseport,bind=0.0.0.0:55221 &
done
diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt
index 3848b419e68c..6c0f32c40f19 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt
@@ -38,7 +38,7 @@
// If queue is empty, accept a packet even if its end_seq is above wup + rcv_wnd
+0 < P. 4001:54001(50000) ack 1 win 257
- +0 > . 1:1(0) ack 54001 win 0
+ * > . 1:1(0) ack 54001 win 0
// Check LINUX_MIB_BEYOND_WINDOW has been incremented 3 times.
+0 `nstat | grep TcpExtBeyondWindow | grep -q " 3 "`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt
deleted file mode 100644
index f575c0ff89da..000000000000
--- a/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt
+++ /dev/null
@@ -1,33 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
---mss=1000
-
-`./defaults.sh`
-
- 0 `nstat -n`
-
-// Establish a connection.
- +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
- +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
- +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [20000], 4) = 0
- +0 bind(3, ..., ...) = 0
- +0 listen(3, 1) = 0
-
- +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
- +0 > S. 0:0(0) ack 1 win 18980 <mss 1460,nop,wscale 0>
- +.1 < . 1:1(0) ack 1 win 257
-
- +0 accept(3, ..., ...) = 4
-
- +0 < P. 1:20001(20000) ack 1 win 257
- +.04 > . 1:1(0) ack 20001 win 18000
-
- +0 setsockopt(4, SOL_SOCKET, SO_RCVBUF, [12000], 4) = 0
- +0 < P. 20001:80001(60000) ack 1 win 257
- +0 > . 1:1(0) ack 20001 win 18000
-
- +0 read(4, ..., 20000) = 20000
-// A too big packet is accepted if the receive queue is empty
- +0 < P. 20001:80001(60000) ack 1 win 257
- +0 > . 1:1(0) ack 80001 win 0
-
diff --git a/tools/testing/selftests/net/tun.c b/tools/testing/selftests/net/tun.c
index 8a5cd5cb5472..cf106a49b55e 100644
--- a/tools/testing/selftests/net/tun.c
+++ b/tools/testing/selftests/net/tun.c
@@ -944,8 +944,8 @@ TEST_F(tun_vnet_udptnl, send_gso_packet)
ASSERT_EQ(ret, off);
ret = receive_gso_packet_from_tunnel(self, variant, &r_num_mss);
- ASSERT_EQ(ret, variant->data_size);
- ASSERT_EQ(r_num_mss, variant->r_num_mss);
+ EXPECT_EQ(ret, variant->data_size);
+ EXPECT_EQ(r_num_mss, variant->r_num_mss);
}
TEST_F(tun_vnet_udptnl, recv_gso_packet)
@@ -955,18 +955,18 @@ TEST_F(tun_vnet_udptnl, recv_gso_packet)
int ret, gso_type = VIRTIO_NET_HDR_GSO_UDP_L4;
ret = send_gso_packet_into_tunnel(self, variant);
- ASSERT_EQ(ret, variant->data_size);
+ EXPECT_EQ(ret, variant->data_size);
memset(&vnet_hdr, 0, sizeof(vnet_hdr));
ret = receive_gso_packet_from_tun(self, variant, &vnet_hdr);
- ASSERT_EQ(ret, variant->data_size);
+ EXPECT_EQ(ret, variant->data_size);
if (!variant->no_gso) {
- ASSERT_EQ(vh->gso_size, variant->gso_size);
+ EXPECT_EQ(vh->gso_size, variant->gso_size);
gso_type |= (variant->tunnel_type & UDP_TUNNEL_OUTER_IPV4) ?
(VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV4) :
(VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6);
- ASSERT_EQ(vh->gso_type, gso_type);
+ EXPECT_EQ(vh->gso_type, gso_type);
}
}
diff --git a/tools/testing/selftests/sched_ext/Makefile b/tools/testing/selftests/sched_ext/Makefile
index 2c601a7eaff5..006300ac6dff 100644
--- a/tools/testing/selftests/sched_ext/Makefile
+++ b/tools/testing/selftests/sched_ext/Makefile
@@ -93,6 +93,8 @@ BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) \
$(CLANG_SYS_INCLUDES) \
-Wall -Wno-compare-distinct-pointer-types \
-Wno-incompatible-function-pointer-types \
+ -Wno-microsoft-anon-tag \
+ -fms-extensions \
-O2 -mcpu=v3
# sort removes libbpf duplicates when not cross-building
diff --git a/tools/testing/selftests/sched_ext/init_enable_count.c b/tools/testing/selftests/sched_ext/init_enable_count.c
index 82c71653977b..44577e30e764 100644
--- a/tools/testing/selftests/sched_ext/init_enable_count.c
+++ b/tools/testing/selftests/sched_ext/init_enable_count.c
@@ -57,7 +57,8 @@ static enum scx_test_status run_test(bool global)
char buf;
close(pipe_fds[1]);
- read(pipe_fds[0], &buf, 1);
+ if (read(pipe_fds[0], &buf, 1) < 0)
+ exit(1);
close(pipe_fds[0]);
exit(0);
}
diff --git a/tools/testing/selftests/sched_ext/peek_dsq.bpf.c b/tools/testing/selftests/sched_ext/peek_dsq.bpf.c
index a3faf5bb49d6..784f2f6c1af9 100644
--- a/tools/testing/selftests/sched_ext/peek_dsq.bpf.c
+++ b/tools/testing/selftests/sched_ext/peek_dsq.bpf.c
@@ -58,14 +58,14 @@ static void record_peek_result(long pid)
{
u32 slot_key;
long *slot_pid_ptr;
- int ix;
+ u32 ix;
if (pid <= 0)
return;
/* Find an empty slot or one with the same PID */
bpf_for(ix, 0, 10) {
- slot_key = (pid + ix) % MAX_SAMPLES;
+ slot_key = ((u64)pid + ix) % MAX_SAMPLES;
slot_pid_ptr = bpf_map_lookup_elem(&peek_results, &slot_key);
if (!slot_pid_ptr)
continue;
diff --git a/tools/testing/selftests/sched_ext/rt_stall.c b/tools/testing/selftests/sched_ext/rt_stall.c
index ab772e336f86..81ea9b4883e5 100644
--- a/tools/testing/selftests/sched_ext/rt_stall.c
+++ b/tools/testing/selftests/sched_ext/rt_stall.c
@@ -15,7 +15,6 @@
#include <signal.h>
#include <bpf/bpf.h>
#include <scx/common.h>
-#include <unistd.h>
#include "rt_stall.bpf.skel.h"
#include "scx_test.h"
#include "../kselftest.h"
diff --git a/tools/testing/selftests/sched_ext/runner.c b/tools/testing/selftests/sched_ext/runner.c
index 5748d2c69903..761c21f96404 100644
--- a/tools/testing/selftests/sched_ext/runner.c
+++ b/tools/testing/selftests/sched_ext/runner.c
@@ -166,6 +166,9 @@ int main(int argc, char **argv)
enum scx_test_status status;
struct scx_test *test = &__scx_tests[i];
+ if (exit_req)
+ break;
+
if (list) {
printf("%s\n", test->name);
if (i == (__scx_num_tests - 1))
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json b/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json
index 7d07c55bb668..33bb8f3ff8ed 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json
@@ -505,5 +505,164 @@
"teardown": [
"$TC qdisc del dev $DEV1 ingress"
]
+ },
+ {
+ "id": "8883",
+ "name": "Try to attach act_ct to an ets qdisc",
+ "category": [
+ "actions",
+ "ct"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action ct",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC qdisc add dev $DEV1 root handle 1: ets bands 2"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent 1: prio 1 protocol ip matchall action ct index 42",
+ "expExitCode": "2",
+ "verifyCmd": "$TC -j filter ls dev $DEV1 parent 1: prio 1 protocol ip",
+ "matchJSON": [],
+ "teardown": [
+ "$TC qdisc del dev $DEV1 root"
+ ]
+ },
+ {
+ "id": "3b10",
+ "name": "Attach act_ct to an ingress qdisc",
+ "category": [
+ "actions",
+ "ct"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action ct",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 ingress prio 1 protocol ip matchall action ct index 42",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -j filter ls dev $DEV1 ingress prio 1 protocol ip",
+ "matchJSON": [
+ {
+ "kind": "matchall"
+ },
+ {
+ "options": {
+ "actions": [
+ {
+ "order": 1,
+ "kind": "ct",
+ "index": 42,
+ "ref": 1,
+ "bind": 1
+ }
+ ]
+ }
+ }
+ ],
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "0337",
+ "name": "Attach act_ct to a clsact/egress qdisc",
+ "category": [
+ "actions",
+ "ct"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action ct",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC qdisc add dev $DEV1 clsact"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 egress prio 1 protocol ip matchall action ct index 42",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -j filter ls dev $DEV1 egress prio 1 protocol ip",
+ "matchJSON": [
+ {
+ "kind": "matchall"
+ },
+ {
+ "options": {
+ "actions": [
+ {
+ "order": 1,
+ "kind": "ct",
+ "index": 42,
+ "ref": 1,
+ "bind": 1
+ }
+ ]
+ }
+ }
+ ],
+ "teardown": [
+ "$TC qdisc del dev $DEV1 clsact"
+ ]
+ },
+ {
+ "id": "4f60",
+ "name": "Attach act_ct to a shared block",
+ "category": [
+ "actions",
+ "ct"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action ct",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC qdisc add dev $DEV1 ingress_block 21 clsact"
+ ],
+ "cmdUnderTest": "$TC filter add block 21 prio 1 protocol ip matchall action ct index 42",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -j filter ls block 21 prio 1 protocol ip",
+ "matchJSON": [
+ {
+ "kind": "matchall"
+ },
+ {
+ "options": {
+ "actions": [
+ {
+ "order": 1,
+ "kind": "ct",
+ "index": 42,
+ "ref": 1,
+ "bind": 1
+ }
+ ]
+ }
+ }
+ ],
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress_block 21 clsact"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json
index f587a32e44c4..808aef4afe22 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json
@@ -1279,5 +1279,104 @@
"teardown": [
"$TC actions flush action ife"
]
+ },
+ {
+ "id": "f2a0",
+ "name": "Update decode ife action with encode metadata",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action ife decode index 10"
+ ],
+ "cmdUnderTest": "$TC actions replace action ife encode use tcindex 1 index 10",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -j actions get action ife index 10",
+ "matchJSON": [
+ {
+ "total acts": 0
+ },
+ {
+ "actions": [
+ {
+ "order": 1,
+ "kind": "ife",
+ "mode": "encode",
+ "control_action": {
+ "type": "pipe"
+ },
+ "type": "0xed3e",
+ "tcindex": 1,
+ "index": 10,
+ "ref": 1,
+ "bind": 0,
+ "not_in_hw": true
+ }
+ ]
+ }
+ ],
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "d352",
+ "name": "Update decode ife action into encode with multiple metadata",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action ife decode index 10"
+ ],
+ "cmdUnderTest": "$TC actions replace action ife encode use tcindex 1 use mark 22 index 10",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -j actions get action ife index 10",
+ "matchJSON": [
+ {
+ "total acts": 0
+ },
+ {
+ "actions": [
+ {
+ "order": 1,
+ "kind": "ife",
+ "mode": "encode",
+ "control_action": {
+ "type": "pipe"
+ },
+ "type": "0xed3e",
+ "tcindex": 1,
+ "mark": 22,
+ "index": 10,
+ "ref": 1,
+ "bind": 0,
+ "not_in_hw": true
+ }
+ ]
+ }
+ ],
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tdc_helper.py b/tools/testing/selftests/tc-testing/tdc_helper.py
index e06f03c0fb5d..adb52fe3acc1 100644
--- a/tools/testing/selftests/tc-testing/tdc_helper.py
+++ b/tools/testing/selftests/tc-testing/tdc_helper.py
@@ -38,10 +38,14 @@ def list_test_cases(testlist):
def list_categories(testlist):
- """ Show all categories that are present in a test case file. """
- categories = set(map(lambda x: x['category'], testlist))
+ """Show all unique categories present in the test cases."""
+ categories = set()
+ for t in testlist:
+ if 'category' in t:
+ categories.update(t['category'])
+
print("Available categories:")
- print(", ".join(str(s) for s in categories))
+ print(", ".join(sorted(categories)))
print("")
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 267c7369c765..794976b88c6f 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -5,6 +5,7 @@ config KVM_COMMON
bool
select EVENTFD
select INTERVAL_TREE
+ select MMU_NOTIFIER
select PREEMPT_NOTIFIERS
config HAVE_KVM_PFNCACHE
@@ -93,24 +94,16 @@ config HAVE_KVM_PM_NOTIFIER
config KVM_GENERIC_HARDWARE_ENABLING
bool
-config KVM_GENERIC_MMU_NOTIFIER
- select MMU_NOTIFIER
- bool
-
config KVM_ELIDE_TLB_FLUSH_IF_YOUNG
- depends on KVM_GENERIC_MMU_NOTIFIER
bool
config KVM_MMU_LOCKLESS_AGING
- depends on KVM_GENERIC_MMU_NOTIFIER
bool
config KVM_GENERIC_MEMORY_ATTRIBUTES
- depends on KVM_GENERIC_MMU_NOTIFIER
bool
config KVM_GUEST_MEMFD
- depends on KVM_GENERIC_MMU_NOTIFIER
select XARRAY_MULTI
bool
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 22f8a672e1fd..1bc1da66b4b0 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -502,7 +502,6 @@ void kvm_destroy_vcpus(struct kvm *kvm)
}
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_destroy_vcpus);
-#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER
static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn)
{
return container_of(mn, struct kvm, mmu_notifier);
@@ -901,15 +900,6 @@ static int kvm_init_mmu_notifier(struct kvm *kvm)
return mmu_notifier_register(&kvm->mmu_notifier, current->mm);
}
-#else /* !CONFIG_KVM_GENERIC_MMU_NOTIFIER */
-
-static int kvm_init_mmu_notifier(struct kvm *kvm)
-{
- return 0;
-}
-
-#endif /* CONFIG_KVM_GENERIC_MMU_NOTIFIER */
-
#ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
static int kvm_pm_notifier_call(struct notifier_block *bl,
unsigned long state,
@@ -1226,10 +1216,8 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
out_err_no_debugfs:
kvm_coalesced_mmio_free(kvm);
out_no_coalesced_mmio:
-#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER
if (kvm->mmu_notifier.ops)
mmu_notifier_unregister(&kvm->mmu_notifier, current->mm);
-#endif
out_err_no_mmu_notifier:
kvm_disable_virtualization();
out_err_no_disable:
@@ -1292,7 +1280,6 @@ static void kvm_destroy_vm(struct kvm *kvm)
kvm->buses[i] = NULL;
}
kvm_coalesced_mmio_free(kvm);
-#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER
mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
/*
* At this point, pending calls to invalidate_range_start()
@@ -1311,9 +1298,6 @@ static void kvm_destroy_vm(struct kvm *kvm)
kvm->mn_active_invalidate_count = 0;
else
WARN_ON(kvm->mmu_invalidate_in_progress);
-#else
- kvm_flush_shadow_all(kvm);
-#endif
kvm_arch_destroy_vm(kvm);
kvm_destroy_devices(kvm);
for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
@@ -4886,6 +4870,7 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
{
switch (arg) {
+ case KVM_CAP_SYNC_MMU:
case KVM_CAP_USER_MEMORY:
case KVM_CAP_USER_MEMORY2:
case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: