summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.mailmap2
-rw-r--r--Documentation/PCI/pcieaer-howto.rst10
-rw-r--r--Documentation/core-api/dma-attributes.rst38
-rw-r--r--Documentation/devicetree/bindings/auxdisplay/holtek,ht16k33.yaml2
-rw-r--r--Documentation/devicetree/bindings/sound/rockchip-spdif.yaml1
-rw-r--r--Documentation/devicetree/bindings/sound/st,stm32-sai.yaml2
-rw-r--r--Documentation/filesystems/overlayfs.rst50
-rw-r--r--Documentation/hwmon/adm1177.rst8
-rw-r--r--Documentation/hwmon/peci-cputemp.rst10
-rw-r--r--Documentation/userspace-api/landlock.rst23
-rw-r--r--MAINTAINERS23
-rw-r--r--Makefile4
-rw-r--r--arch/arm64/kvm/at.c2
-rw-r--r--arch/arm64/kvm/reset.c14
-rw-r--r--arch/loongarch/include/asm/linkage.h36
-rw-r--r--arch/loongarch/include/asm/sigframe.h9
-rw-r--r--arch/loongarch/kernel/asm-offsets.c2
-rw-r--r--arch/loongarch/kernel/env.c7
-rw-r--r--arch/loongarch/kernel/signal.c6
-rw-r--r--arch/loongarch/kvm/intc/eiointc.c16
-rw-r--r--arch/loongarch/kvm/vcpu.c3
-rw-r--r--arch/loongarch/pci/pci.c80
-rw-r--r--arch/loongarch/vdso/Makefile4
-rw-r--r--arch/loongarch/vdso/sigreturn.S6
-rw-r--r--arch/s390/include/asm/barrier.h4
-rw-r--r--arch/s390/include/asm/kvm_host.h3
-rw-r--r--arch/s390/include/asm/stacktrace.h2
-rw-r--r--arch/s390/kernel/asm-offsets.c2
-rw-r--r--arch/s390/kernel/entry.S7
-rw-r--r--arch/s390/kernel/nmi.c4
-rw-r--r--arch/s390/kernel/syscall.c5
-rw-r--r--arch/s390/kvm/dat.c100
-rw-r--r--arch/s390/kvm/dat.h23
-rw-r--r--arch/s390/kvm/gaccess.c77
-rw-r--r--arch/s390/kvm/gmap.c160
-rw-r--r--arch/s390/kvm/gmap.h33
-rw-r--r--arch/s390/kvm/interrupt.c18
-rw-r--r--arch/s390/kvm/kvm-s390.c34
-rw-r--r--arch/s390/kvm/vsie.c12
-rw-r--r--arch/s390/mm/fault.c11
-rw-r--r--arch/x86/coco/sev/noinstr.c6
-rw-r--r--arch/x86/entry/entry_fred.c14
-rw-r--r--arch/x86/kernel/cpu/common.c33
-rw-r--r--arch/x86/kvm/mmu/mmu.c17
-rw-r--r--arch/x86/platform/efi/quirks.c2
-rw-r--r--crypto/af_alg.c53
-rw-r--r--crypto/algif_aead.c100
-rw-r--r--crypto/algif_skcipher.c6
-rw-r--r--crypto/authencesn.c48
-rw-r--r--crypto/deflate.c11
-rw-r--r--drivers/accel/ivpu/ivpu_drv.h1
-rw-r--r--drivers/accel/ivpu/ivpu_hw.c6
-rw-r--r--drivers/acpi/ec.c2
-rw-r--r--drivers/auxdisplay/lcd2s.c15
-rw-r--r--drivers/auxdisplay/line-display.c2
-rw-r--r--drivers/base/regmap/regmap.c30
-rw-r--r--drivers/block/zram/zram_drv.c39
-rw-r--r--drivers/bluetooth/btintel.c11
-rw-r--r--drivers/bluetooth/btusb.c5
-rw-r--r--drivers/bluetooth/hci_h4.c3
-rw-r--r--drivers/bluetooth/hci_ll.c2
-rw-r--r--drivers/cpufreq/cpufreq.c9
-rw-r--r--drivers/cpufreq/cpufreq_conservative.c12
-rw-r--r--drivers/cpufreq/cpufreq_governor.c3
-rw-r--r--drivers/cpufreq/cpufreq_governor.h1
-rw-r--r--drivers/cpufreq/freq_table.c4
-rw-r--r--drivers/crypto/caam/caamalg_qi2.c3
-rw-r--r--drivers/crypto/caam/caamhash.c3
-rw-r--r--drivers/crypto/tegra/tegra-se-aes.c11
-rw-r--r--drivers/crypto/tegra/tegra-se-hash.c30
-rw-r--r--drivers/cxl/Kconfig1
-rw-r--r--drivers/cxl/core/hdm.c25
-rw-r--r--drivers/cxl/core/mbox.c2
-rw-r--r--drivers/cxl/core/port.c8
-rw-r--r--drivers/cxl/core/region.c4
-rw-r--r--drivers/cxl/pmem.c2
-rw-r--r--drivers/dma/dw-edma/dw-edma-core.c8
-rw-r--r--drivers/dma/dw-edma/dw-hdma-v0-core.c6
-rw-r--r--drivers/dma/fsl-edma-main.c26
-rw-r--r--drivers/dma/idxd/cdev.c8
-rw-r--r--drivers/dma/idxd/device.c45
-rw-r--r--drivers/dma/idxd/dma.c18
-rw-r--r--drivers/dma/idxd/idxd.h1
-rw-r--r--drivers/dma/idxd/init.c14
-rw-r--r--drivers/dma/idxd/irq.c16
-rw-r--r--drivers/dma/idxd/submit.c2
-rw-r--r--drivers/dma/idxd/sysfs.c1
-rw-r--r--drivers/dma/sh/rz-dmac.c66
-rw-r--r--drivers/dma/xilinx/xdma.c4
-rw-r--r--drivers/dma/xilinx/xilinx_dma.c46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c45
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c6
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c10
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h1
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c14
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c6
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c33
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c21
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c33
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c33
-rw-r--r--drivers/gpu/drm/drm_gem_shmem_helper.c50
-rw-r--r--drivers/gpu/drm/drm_syncobj.c4
-rw-r--r--drivers/gpu/drm/i915/display/intel_display.c8
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_tunnel.c20
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_tunnel.h11
-rw-r--r--drivers/gpu/drm/i915/display/intel_gmbus.c4
-rw-r--r--drivers/gpu/drm/i915/display/intel_plane.c11
-rw-r--r--drivers/gpu/drm/i915/i915_wait_util.h2
-rw-r--r--drivers/gpu/drm/mediatek/mtk_dsi.c9
-rw-r--r--drivers/gpu/drm/xe/regs/xe_gt_regs.h1
-rw-r--r--drivers/gpu/drm/xe/xe_pt.c12
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_packet.c2
-rw-r--r--drivers/gpu/drm/xe/xe_vm.c22
-rw-r--r--drivers/gpu/drm/xe/xe_vm_types.h4
-rw-r--r--drivers/gpu/drm/xe/xe_wa.c3
-rw-r--r--drivers/hwmon/adm1177.c54
-rw-r--r--drivers/hwmon/peci/cputemp.c4
-rw-r--r--drivers/hwmon/pmbus/ina233.c3
-rw-r--r--drivers/hwmon/pmbus/isl68137.c21
-rw-r--r--drivers/hwmon/pmbus/pmbus_core.c192
-rw-r--r--drivers/i2c/busses/i2c-designware-amdisp.c11
-rw-r--r--drivers/i2c/busses/i2c-imx.c51
-rw-r--r--drivers/infiniband/core/rw.c37
-rw-r--r--drivers/infiniband/core/umem.c5
-rw-r--r--drivers/infiniband/hw/bng_re/bng_dev.c14
-rw-r--r--drivers/infiniband/hw/efa/efa_com.c88
-rw-r--r--drivers/infiniband/hw/ionic/ionic_controlpath.c4
-rw-r--r--drivers/infiniband/hw/irdma/cm.c29
-rw-r--r--drivers/infiniband/hw/irdma/uk.c39
-rw-r--r--drivers/infiniband/hw/irdma/utils.c2
-rw-r--r--drivers/infiniband/hw/irdma/verbs.c10
-rw-r--r--drivers/iommu/dma-iommu.c21
-rw-r--r--drivers/iommu/generic_pt/fmt/amdv1.h2
-rw-r--r--drivers/iommu/generic_pt/iommu_pt.h2
-rw-r--r--drivers/irqchip/irq-qcom-mpm.c3
-rw-r--r--drivers/irqchip/irq-renesas-rzv2h.c2
-rw-r--r--drivers/media/i2c/ccs/ccs-core.c2
-rw-r--r--drivers/media/mc/mc-request.c5
-rw-r--r--drivers/media/platform/rockchip/rkvdec/rkvdec-hevc-common.c4
-rw-r--r--drivers/media/platform/rockchip/rkvdec/rkvdec-vdpu383-h264.c50
-rw-r--r--drivers/media/platform/rockchip/rkvdec/rkvdec-vp9.c3
-rw-r--r--drivers/media/platform/synopsys/Kconfig1
-rw-r--r--drivers/media/platform/synopsys/dw-mipi-csi2rx.c2
-rw-r--r--drivers/media/platform/verisilicon/imx8m_vpu_hw.c2
-rw-r--r--drivers/media/usb/uvc/uvc_video.c9
-rw-r--r--drivers/media/v4l2-core/v4l2-ioctl.c5
-rw-r--r--drivers/net/bonding/bond_main.c2
-rw-r--r--drivers/net/can/dev/netlink.c4
-rw-r--r--drivers/net/can/spi/mcp251x.c29
-rw-r--r--drivers/net/ethernet/airoha/airoha_eth.c20
-rw-r--r--drivers/net/ethernet/airoha/airoha_eth.h1
-rw-r--r--drivers/net/ethernet/airoha/airoha_ppe.c9
-rw-r--r--drivers/net/ethernet/broadcom/Kconfig2
-rw-r--r--drivers/net/ethernet/broadcom/asp2/bcmasp.c41
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c76
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.h1
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c5
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c5
-rw-r--r--drivers/net/ethernet/broadcom/tg3.c2
-rw-r--r--drivers/net/ethernet/cadence/macb_main.c41
-rw-r--r--drivers/net/ethernet/cadence/macb_pci.c10
-rw-r--r--drivers/net/ethernet/faraday/ftgmac100.c28
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc.c13
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc4_hw.h11
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc4_pf.c118
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_ethtool.c12
-rw-r--r--drivers/net/ethernet/freescale/fec_ptp.c3
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_ethtool.c31
-rw-r--r--drivers/net/ethernet/intel/ice/ice.h22
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool.c32
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_repr.c5
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf.h2
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_idc.c6
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_txrx.c2
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_virtchnl.c2
-rw-r--r--drivers/net/ethernet/mediatek/mtk_ppe_offload.c21
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/devlink.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fw.c53
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h4
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c2
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_txrx.c6
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_txrx.h2
-rw-r--r--drivers/net/ethernet/microchip/lan743x_main.c5
-rw-r--r--drivers/net/ethernet/microsoft/mana/mana_en.c13
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_lif.c17
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c14
-rw-r--r--drivers/net/ethernet/ti/icssg/icssg_common.c6
-rw-r--r--drivers/net/ethernet/xilinx/xilinx_axienet.h4
-rw-r--r--drivers/net/ethernet/xilinx/xilinx_axienet_main.c9
-rw-r--r--drivers/net/phy/sfp.c7
-rw-r--r--drivers/net/team/team_core.c65
-rw-r--r--drivers/net/tun_vnet.h2
-rw-r--r--drivers/net/virtio_net.c27
-rw-r--r--drivers/net/vxlan/vxlan_core.c6
-rw-r--r--drivers/net/wireless/ath/ath11k/dp_rx.c15
-rw-r--r--drivers/net/wireless/ath/ath12k/dp_rx.c4
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/commands.h5
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/scan.h10
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mld/iface.c101
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mld/mac80211.c19
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mld/mld.c1
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mld/mlo.c4
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mld/notif.c5
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mld/scan.c30
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mld/scan.h9
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/d3.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/fw.c3
-rw-r--r--drivers/net/wireless/microchip/wilc1000/hif.c2
-rw-r--r--drivers/net/wireless/ti/wl1251/tx.c8
-rw-r--r--drivers/net/wireless/virtual/virt_wifi.c1
-rw-r--r--drivers/nfc/pn533/uart.c3
-rw-r--r--drivers/pci/pwrctrl/core.c4
-rw-r--r--drivers/pci/pwrctrl/pci-pwrctrl-pwrseq.c12
-rw-r--r--drivers/pci/pwrctrl/slot.c1
-rw-r--r--drivers/phy/Kconfig5
-rw-r--r--drivers/phy/freescale/phy-fsl-lynx-28g.c2
-rw-r--r--drivers/phy/qualcomm/phy-qcom-qmp-ufs.c3
-rw-r--r--drivers/phy/spacemit/phy-k1-usb2.c14
-rw-r--r--drivers/phy/ti/phy-j721e-wiz.c2
-rw-r--r--drivers/pinctrl/mediatek/pinctrl-mtk-common.c9
-rw-r--r--drivers/pinctrl/qcom/pinctrl-spmi-gpio.c16
-rw-r--r--drivers/pinctrl/renesas/pinctrl-rza1.c2
-rw-r--r--drivers/pinctrl/renesas/pinctrl-rzt2h.c15
-rw-r--r--drivers/pinctrl/stm32/Kconfig1
-rw-r--r--drivers/pinctrl/sunxi/pinctrl-sunxi.c43
-rw-r--r--drivers/pinctrl/sunxi/pinctrl-sunxi.h4
-rw-r--r--drivers/platform/olpc/olpc-xo175-ec.c2
-rw-r--r--drivers/platform/x86/amd/hsmp/hsmp.c2
-rw-r--r--drivers/platform/x86/asus-armoury.h77
-rw-r--r--drivers/platform/x86/asus-nb-wmi.c2
-rw-r--r--drivers/platform/x86/hp/hp-wmi.c19
-rw-r--r--drivers/platform/x86/intel/hid.c10
-rw-r--r--drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c5
-rw-r--r--drivers/platform/x86/lenovo/wmi-gamezone.c2
-rw-r--r--drivers/scsi/ibmvscsi/ibmvfc.c3
-rw-r--r--drivers/scsi/scsi_transport_sas.c2
-rw-r--r--drivers/scsi/ses.c2
-rw-r--r--drivers/spi/spi-fsl-lpspi.c3
-rw-r--r--drivers/spi/spi-meson-spicc.c2
-rw-r--r--drivers/spi/spi-sn-f-ospi.c42
-rw-r--r--drivers/spi/spi.c32
-rw-r--r--drivers/target/loopback/tcm_loop.c52
-rw-r--r--drivers/target/target_core_file.c2
-rw-r--r--drivers/thermal/intel/int340x_thermal/processor_thermal_soc_slider.c8
-rw-r--r--drivers/vfio/pci/vfio_pci_dmabuf.c5
-rw-r--r--drivers/virt/coco/tdx-guest/tdx-guest.c12
-rw-r--r--drivers/virtio/virtio_ring.c10
-rw-r--r--drivers/xen/privcmd.c76
-rw-r--r--fs/btrfs/block-group.c2
-rw-r--r--fs/btrfs/disk-io.c4
-rw-r--r--fs/btrfs/extent-tree.c2
-rw-r--r--fs/btrfs/tree-log.c98
-rw-r--r--fs/btrfs/volumes.c5
-rw-r--r--fs/btrfs/zlib.c4
-rw-r--r--fs/erofs/Kconfig45
-rw-r--r--fs/erofs/fileio.c6
-rw-r--r--fs/erofs/ishare.c15
-rw-r--r--fs/erofs/zdata.c3
-rw-r--r--fs/ext4/Makefile5
-rw-r--r--fs/ext4/crypto.c9
-rw-r--r--fs/ext4/ext4.h6
-rw-r--r--fs/ext4/ext4_extents.h12
-rw-r--r--fs/ext4/extents-test.c12
-rw-r--r--fs/ext4/extents.c80
-rw-r--r--fs/ext4/fast_commit.c17
-rw-r--r--fs/ext4/fsync.c16
-rw-r--r--fs/ext4/ialloc.c6
-rw-r--r--fs/ext4/inline.c10
-rw-r--r--fs/ext4/inode.c75
-rw-r--r--fs/ext4/mballoc-test.c81
-rw-r--r--fs/ext4/mballoc.c132
-rw-r--r--fs/ext4/mballoc.h30
-rw-r--r--fs/ext4/page-io.c10
-rw-r--r--fs/ext4/super.c37
-rw-r--r--fs/ext4/sysfs.c10
-rw-r--r--fs/fs-writeback.c36
-rw-r--r--fs/fuse/file.c4
-rw-r--r--fs/fuse/inode.c1
-rw-r--r--fs/iomap/bio.c51
-rw-r--r--fs/iomap/buffered-io.c15
-rw-r--r--fs/jbd2/checkpoint.c15
-rw-r--r--fs/mpage.c30
-rw-r--r--fs/namei.c10
-rw-r--r--fs/netfs/buffered_read.c3
-rw-r--r--fs/netfs/direct_read.c3
-rw-r--r--fs/netfs/direct_write.c15
-rw-r--r--fs/netfs/iterator.c43
-rw-r--r--fs/netfs/read_collect.c4
-rw-r--r--fs/netfs/read_retry.c5
-rw-r--r--fs/netfs/read_single.c1
-rw-r--r--fs/netfs/write_collect.c4
-rw-r--r--fs/netfs/write_issue.c3
-rw-r--r--fs/overlayfs/copy_up.c6
-rw-r--r--fs/overlayfs/overlayfs.h21
-rw-r--r--fs/overlayfs/ovl_entry.h7
-rw-r--r--fs/overlayfs/params.c33
-rw-r--r--fs/overlayfs/super.c2
-rw-r--r--fs/overlayfs/util.c5
-rw-r--r--fs/smb/client/Makefile7
-rw-r--r--fs/smb/client/fs_context.c4
-rw-r--r--fs/smb/server/oplock.c72
-rw-r--r--fs/smb/server/smb2pdu.c194
-rw-r--r--fs/smb/server/smbacl.c43
-rw-r--r--fs/smb/server/smbacl.h2
-rw-r--r--fs/udf/inode.c33
-rw-r--r--fs/xfs/libxfs/xfs_attr.h3
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c22
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.h3
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c53
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.h2
-rw-r--r--fs/xfs/scrub/quota.c4
-rw-r--r--fs/xfs/scrub/trace.h12
-rw-r--r--fs/xfs/xfs_attr_inactive.c95
-rw-r--r--fs/xfs/xfs_attr_item.c51
-rw-r--r--fs/xfs/xfs_dquot_item.c9
-rw-r--r--fs/xfs/xfs_handle.c2
-rw-r--r--fs/xfs/xfs_inode.c3
-rw-r--r--fs/xfs/xfs_inode_item.c9
-rw-r--r--fs/xfs/xfs_mount.c7
-rw-r--r--fs/xfs/xfs_trace.h47
-rw-r--r--fs/xfs/xfs_trans_ail.c127
-rw-r--r--fs/xfs/xfs_verify_media.c18
-rw-r--r--fs/xfs/xfs_xattr.c2
-rw-r--r--include/crypto/if_alg.h5
-rw-r--r--include/linux/bpf.h4
-rw-r--r--include/linux/cgroup-defs.h3
-rw-r--r--include/linux/damon.h6
-rw-r--r--include/linux/dma-mapping.h19
-rw-r--r--include/linux/fs/super_types.h1
-rw-r--r--include/linux/iommu.h3
-rw-r--r--include/linux/leafops.h32
-rw-r--r--include/linux/mempolicy.h1
-rw-r--r--include/linux/mpage.h11
-rw-r--r--include/linux/netfilter/ipset/ip_set.h2
-rw-r--r--include/linux/netfs.h1
-rw-r--r--include/linux/pagemap.h11
-rw-r--r--include/linux/security.h1
-rw-r--r--include/linux/skbuff.h1
-rw-r--r--include/linux/spi/spi.h5
-rw-r--r--include/linux/srcutiny.h4
-rw-r--r--include/linux/srcutree.h9
-rw-r--r--include/linux/tracepoint.h20
-rw-r--r--include/linux/virtio_net.h53
-rw-r--r--include/net/bluetooth/l2cap.h1
-rw-r--r--include/net/codel_impl.h1
-rw-r--r--include/net/inet_hashtables.h14
-rw-r--r--include/net/ip6_fib.h21
-rw-r--r--include/net/netfilter/nf_conntrack_core.h5
-rw-r--r--include/net/netfilter/nf_conntrack_expect.h20
-rw-r--r--include/net/netns/mpls.h1
-rw-r--r--include/net/netns/xfrm.h2
-rw-r--r--include/sound/sdca_function.h5
-rw-r--r--include/trace/events/btrfs.h11
-rw-r--r--include/trace/events/dma.h4
-rw-r--r--include/trace/events/netfs.h8
-rw-r--r--include/uapi/linux/netfilter/nf_conntrack_common.h4
-rw-r--r--init/Kconfig2
-rw-r--r--io_uring/fdinfo.c4
-rw-r--r--kernel/bpf/syscall.c25
-rw-r--r--kernel/bpf/verifier.c37
-rw-r--r--kernel/cgroup/cgroup.c88
-rw-r--r--kernel/cgroup/cpuset.c29
-rw-r--r--kernel/dma/debug.c9
-rw-r--r--kernel/dma/direct.h7
-rw-r--r--kernel/dma/mapping.c6
-rw-r--r--kernel/dma/swiotlb.c21
-rw-r--r--kernel/futex/core.c2
-rw-r--r--kernel/futex/pi.c3
-rw-r--r--kernel/futex/syscalls.c8
-rw-r--r--kernel/power/main.c2
-rw-r--r--kernel/power/snapshot.c11
-rw-r--r--kernel/rcu/rcu.h9
-rw-r--r--kernel/rcu/srcutiny.c19
-rw-r--r--kernel/rcu/srcutree.c211
-rw-r--r--kernel/sched/ext.c95
-rw-r--r--kernel/sched/ext_idle.c2
-rw-r--r--kernel/sched/sched.h3
-rw-r--r--kernel/sysctl.c2
-rw-r--r--kernel/time/alarmtimer.c2
-rw-r--r--kernel/trace/bpf_trace.c4
-rw-r--r--kernel/trace/trace_events_trigger.c79
-rw-r--r--kernel/trace/trace_osnoise.c10
-rw-r--r--kernel/workqueue.c25
-rw-r--r--lib/bug.c7
-rw-r--r--lib/crypto/chacha-block-generic.c4
-rw-r--r--mm/damon/core.c8
-rw-r--r--mm/damon/stat.c53
-rw-r--r--mm/damon/sysfs.c10
-rw-r--r--mm/hmm.c4
-rw-r--r--mm/memory.c18
-rw-r--r--mm/mempolicy.c10
-rw-r--r--mm/mseal.c3
-rw-r--r--mm/pagewalk.c25
-rw-r--r--mm/rmap.c7
-rw-r--r--mm/swap_state.c9
-rw-r--r--mm/zswap.c8
-rw-r--r--net/bluetooth/hci_conn.c10
-rw-r--r--net/bluetooth/hci_core.c2
-rw-r--r--net/bluetooth/hci_event.c127
-rw-r--r--net/bluetooth/hci_sync.c108
-rw-r--r--net/bluetooth/l2cap_core.c71
-rw-r--r--net/bluetooth/l2cap_sock.c3
-rw-r--r--net/bluetooth/mgmt.c19
-rw-r--r--net/bluetooth/sco.c36
-rw-r--r--net/bluetooth/smp.c11
-rw-r--r--net/bridge/br_arp_nd_proxy.c18
-rw-r--r--net/bridge/br_mrp_netlink.c4
-rw-r--r--net/can/af_can.c4
-rw-r--r--net/can/af_can.h2
-rw-r--r--net/can/gw.c6
-rw-r--r--net/can/isotp.c24
-rw-r--r--net/can/proc.c3
-rw-r--r--net/core/dev.c33
-rw-r--r--net/core/rtnetlink.c28
-rw-r--r--net/core/skmsg.c13
-rw-r--r--net/hsr/hsr_device.c32
-rw-r--r--net/hsr/hsr_framereg.c38
-rw-r--r--net/ipv4/esp4.c9
-rw-r--r--net/ipv4/inet_connection_sock.c20
-rw-r--r--net/ipv4/udp.c2
-rw-r--r--net/ipv6/addrconf.c10
-rw-r--r--net/ipv6/datagram.c10
-rw-r--r--net/ipv6/esp6.c9
-rw-r--r--net/ipv6/icmp.c3
-rw-r--r--net/ipv6/ioam6.c4
-rw-r--r--net/ipv6/ip6_fib.c29
-rw-r--r--net/ipv6/ip6_flowlabel.c5
-rw-r--r--net/ipv6/ip6_tunnel.c5
-rw-r--r--net/ipv6/ndisc.c3
-rw-r--r--net/ipv6/netfilter/ip6t_rt.c4
-rw-r--r--net/ipv6/route.c2
-rw-r--r--net/key/af_key.c19
-rw-r--r--net/mpls/af_mpls.c29
-rw-r--r--net/mptcp/protocol.c11
-rw-r--r--net/netfilter/ipset/ip_set_core.c4
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h2
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c4
-rw-r--r--net/netfilter/nf_conntrack_broadcast.c8
-rw-r--r--net/netfilter/nf_conntrack_ecache.c2
-rw-r--r--net/netfilter/nf_conntrack_expect.c39
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c12
-rw-r--r--net/netfilter/nf_conntrack_helper.c13
-rw-r--r--net/netfilter/nf_conntrack_netlink.c131
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c10
-rw-r--r--net/netfilter/nf_conntrack_sip.c18
-rw-r--r--net/netfilter/nf_flow_table_offload.c196
-rw-r--r--net/netfilter/nf_tables_api.c7
-rw-r--r--net/netfilter/nfnetlink_log.c10
-rw-r--r--net/netfilter/nft_set_pipapo_avx2.c20
-rw-r--r--net/netfilter/nft_set_rbtree.c92
-rw-r--r--net/netfilter/x_tables.c23
-rw-r--r--net/netfilter/xt_cgroup.c6
-rw-r--r--net/netfilter/xt_rateest.c5
-rw-r--r--net/nfc/nci/core.c10
-rw-r--r--net/openvswitch/flow_netlink.c2
-rw-r--r--net/openvswitch/vport-netdev.c11
-rw-r--r--net/packet/af_packet.c1
-rw-r--r--net/qrtr/af_qrtr.c31
-rw-r--r--net/rds/ib_rdma.c7
-rw-r--r--net/sched/cls_api.c1
-rw-r--r--net/sched/cls_flow.c10
-rw-r--r--net/sched/cls_fw.c14
-rw-r--r--net/sched/sch_hfsc.c4
-rw-r--r--net/sched/sch_netem.c5
-rw-r--r--net/smc/smc_rx.c9
-rw-r--r--net/tls/tls_sw.c2
-rw-r--r--net/vmw_vsock/af_vsock.c1
-rw-r--r--net/x25/x25_in.c9
-rw-r--r--net/x25/x25_subr.c1
-rw-r--r--net/xfrm/xfrm_input.c5
-rw-r--r--net/xfrm/xfrm_iptfs.c17
-rw-r--r--net/xfrm/xfrm_nat_keepalive.c2
-rw-r--r--net/xfrm/xfrm_policy.c12
-rw-r--r--net/xfrm/xfrm_state.c116
-rw-r--r--net/xfrm/xfrm_user.c32
-rw-r--r--rust/kernel/regulator.rs33
-rw-r--r--samples/landlock/sandboxer.c5
-rw-r--r--scripts/coccinelle/api/kmalloc_objs.cocci11
-rwxr-xr-xscripts/kconfig/merge_config.sh24
-rw-r--r--security/landlock/domain.c3
-rw-r--r--security/landlock/ruleset.c9
-rw-r--r--security/landlock/tsync.c92
-rw-r--r--security/security.c1
-rw-r--r--sound/firewire/amdtp-stream.c2
-rw-r--r--sound/hda/codecs/realtek/alc269.c94
-rw-r--r--sound/hda/controllers/intel.c14
-rw-r--r--sound/pci/asihpi/hpimsgx.c6
-rw-r--r--sound/pci/ctxfi/ctatc.c10
-rw-r--r--sound/pci/ctxfi/ctdaio.c86
-rw-r--r--sound/soc/amd/acp-config.c15
-rw-r--r--sound/soc/amd/acp/acp-sdw-legacy-mach.c8
-rw-r--r--sound/soc/amd/acp/amd-acp70-acpi-match.c61
-rw-r--r--sound/soc/amd/ps/pci-ps.c2
-rw-r--r--sound/soc/amd/yc/acp6x-mach.c28
-rw-r--r--sound/soc/cirrus/ep93xx-i2s.c34
-rw-r--r--sound/soc/codecs/adau1372.c34
-rw-r--r--sound/soc/codecs/sma1307.c6
-rw-r--r--sound/soc/codecs/tas2781-fmwlib.c3
-rw-r--r--sound/soc/codecs/wcd934x.c2
-rw-r--r--sound/soc/fsl/imx-card.c2
-rw-r--r--sound/soc/generic/simple-card-utils.c4
-rw-r--r--sound/soc/intel/boards/Kconfig2
-rw-r--r--sound/soc/intel/boards/ehl_rt5660.c2
-rw-r--r--sound/soc/intel/catpt/device.c10
-rw-r--r--sound/soc/intel/catpt/dsp.c3
-rw-r--r--sound/soc/sdca/sdca_functions.c14
-rw-r--r--sound/soc/soc-core.c1
-rw-r--r--sound/soc/sof/ipc4-topology.c2
-rw-r--r--sound/soc/sof/topology.c2
-rw-r--r--sound/usb/Kconfig1
-rw-r--r--sound/usb/caiaq/device.c2
-rw-r--r--sound/usb/qcom/qc_audio_offload.c2
-rw-r--r--sound/usb/quirks.c8
-rw-r--r--tools/arch/x86/include/asm/msr-index.h5
-rw-r--r--tools/arch/x86/include/uapi/asm/kvm.h1
-rw-r--r--tools/include/linux/build_bug.h4
-rw-r--r--tools/include/uapi/linux/kvm.h8
-rwxr-xr-xtools/perf/check-headers.sh1
-rw-r--r--tools/perf/util/kvm-stat-arch/kvm-stat-x86.c6
-rw-r--r--tools/perf/util/metricgroup.c6
-rw-r--r--tools/perf/util/parse-events.c82
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_precision.c341
-rw-r--r--tools/testing/selftests/cgroup/lib/cgroup_util.c15
-rw-r--r--tools/testing/selftests/cgroup/lib/include/cgroup_util.h2
-rw-r--r--tools/testing/selftests/cgroup/test_core.c3
-rw-r--r--tools/testing/selftests/cgroup/test_kill.c7
-rw-r--r--tools/testing/selftests/drivers/net/team/Makefile1
-rw-r--r--tools/testing/selftests/drivers/net/team/config2
-rwxr-xr-xtools/testing/selftests/drivers/net/team/non_ether_header_ops.sh41
-rw-r--r--tools/testing/selftests/kvm/Makefile.kvm1
-rw-r--r--tools/testing/selftests/kvm/s390/irq_routing.c75
-rw-r--r--tools/testing/selftests/landlock/tsync_test.c91
-rw-r--r--tools/testing/selftests/mount_setattr/mount_setattr_test.c2
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh61
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_concat_range.sh70
-rw-r--r--tools/testing/selftests/sched_ext/Makefile1
-rw-r--r--tools/testing/selftests/sched_ext/cyclic_kick_wait.bpf.c68
-rw-r--r--tools/testing/selftests/sched_ext/cyclic_kick_wait.c194
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/infra/filter.json44
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json25
-rw-r--r--tools/tracing/rtla/src/timerlat_bpf.h1
553 files changed, 7843 insertions, 2932 deletions
diff --git a/.mailmap b/.mailmap
index 40b4db2b2d60..2d04aeba68b4 100644
--- a/.mailmap
+++ b/.mailmap
@@ -316,6 +316,7 @@ Hans Verkuil <hverkuil@kernel.org> <hverkuil-cisco@xs4all.nl>
Hans Verkuil <hverkuil@kernel.org> <hansverk@cisco.com>
Hao Ge <hao.ge@linux.dev> <gehao@kylinos.cn>
Harry Yoo <harry.yoo@oracle.com> <42.hyeyoo@gmail.com>
+Harry Yoo <harry@kernel.org> <harry.yoo@oracle.com>
Heiko Carstens <hca@linux.ibm.com> <h.carstens@de.ibm.com>
Heiko Carstens <hca@linux.ibm.com> <heiko.carstens@de.ibm.com>
Heiko Stuebner <heiko@sntech.de> <heiko.stuebner@bqreaders.com>
@@ -587,6 +588,7 @@ Morten Welinder <terra@gnome.org>
Morten Welinder <welinder@anemone.rentec.com>
Morten Welinder <welinder@darter.rentec.com>
Morten Welinder <welinder@troll.com>
+Muhammad Usama Anjum <usama.anjum@arm.com> <usama.anjum@collabora.com>
Mukesh Ojha <quic_mojha@quicinc.com> <mojha@codeaurora.org>
Muna Sinada <quic_msinada@quicinc.com> <msinada@codeaurora.org>
Murali Nalajala <quic_mnalajal@quicinc.com> <mnalajal@codeaurora.org>
diff --git a/Documentation/PCI/pcieaer-howto.rst b/Documentation/PCI/pcieaer-howto.rst
index 3210c4792978..90fdfddd3ae5 100644
--- a/Documentation/PCI/pcieaer-howto.rst
+++ b/Documentation/PCI/pcieaer-howto.rst
@@ -85,6 +85,16 @@ In the example, 'Requester ID' means the ID of the device that sent
the error message to the Root Port. Please refer to PCIe specs for other
fields.
+The 'TLP Header' is the prefix/header of the TLP that caused the error
+in raw hex format. To decode the TLP Header into human-readable form
+one may use tlp-tool:
+
+https://github.com/mmpg-x86/tlp-tool
+
+Example usage::
+
+ curl -L https://git.kernel.org/linus/2ca1c94ce0b6 | rtlp-tool --aer
+
AER Ratelimits
--------------
diff --git a/Documentation/core-api/dma-attributes.rst b/Documentation/core-api/dma-attributes.rst
index 1d7bfad73b1c..123c8468d58f 100644
--- a/Documentation/core-api/dma-attributes.rst
+++ b/Documentation/core-api/dma-attributes.rst
@@ -149,11 +149,33 @@ For architectures that require cache flushing for DMA coherence
DMA_ATTR_MMIO will not perform any cache flushing. The address
provided must never be mapped cacheable into the CPU.
-DMA_ATTR_CPU_CACHE_CLEAN
-------------------------
-
-This attribute indicates the CPU will not dirty any cacheline overlapping this
-DMA_FROM_DEVICE/DMA_BIDIRECTIONAL buffer while it is mapped. This allows
-multiple small buffers to safely share a cacheline without risk of data
-corruption, suppressing DMA debug warnings about overlapping mappings.
-All mappings sharing a cacheline should have this attribute.
+DMA_ATTR_DEBUGGING_IGNORE_CACHELINES
+------------------------------------
+
+This attribute indicates that CPU cache lines may overlap for buffers mapped
+with DMA_FROM_DEVICE or DMA_BIDIRECTIONAL.
+
+Such overlap may occur when callers map multiple small buffers that reside
+within the same cache line. In this case, callers must guarantee that the CPU
+will not dirty these cache lines after the mappings are established. When this
+condition is met, multiple buffers can safely share a cache line without risking
+data corruption.
+
+All mappings that share a cache line must set this attribute to suppress DMA
+debug warnings about overlapping mappings.
+
+DMA_ATTR_REQUIRE_COHERENT
+-------------------------
+
+DMA mapping requests with the DMA_ATTR_REQUIRE_COHERENT fail on any
+system where SWIOTLB or cache management is required. This should only
+be used to support uAPI designs that require continuous HW DMA
+coherence with userspace processes, for example RDMA and DRM. At a
+minimum the memory being mapped must be userspace memory from
+pin_user_pages() or similar.
+
+Drivers should consider using dma_mmap_pages() instead of this
+interface when building their uAPIs, when possible.
+
+It must never be used in an in-kernel driver that only works with
+kernel memory.
diff --git a/Documentation/devicetree/bindings/auxdisplay/holtek,ht16k33.yaml b/Documentation/devicetree/bindings/auxdisplay/holtek,ht16k33.yaml
index b90eec2077b4..fe1272e86467 100644
--- a/Documentation/devicetree/bindings/auxdisplay/holtek,ht16k33.yaml
+++ b/Documentation/devicetree/bindings/auxdisplay/holtek,ht16k33.yaml
@@ -66,7 +66,7 @@ then:
required:
- refresh-rate-hz
-additionalProperties: false
+unevaluatedProperties: false
examples:
- |
diff --git a/Documentation/devicetree/bindings/sound/rockchip-spdif.yaml b/Documentation/devicetree/bindings/sound/rockchip-spdif.yaml
index 56c755c22945..502907dd28b3 100644
--- a/Documentation/devicetree/bindings/sound/rockchip-spdif.yaml
+++ b/Documentation/devicetree/bindings/sound/rockchip-spdif.yaml
@@ -33,6 +33,7 @@ properties:
- const: rockchip,rk3066-spdif
- items:
- enum:
+ - rockchip,rk3576-spdif
- rockchip,rk3588-spdif
- const: rockchip,rk3568-spdif
diff --git a/Documentation/devicetree/bindings/sound/st,stm32-sai.yaml b/Documentation/devicetree/bindings/sound/st,stm32-sai.yaml
index 4a7129d0b157..551edf39e766 100644
--- a/Documentation/devicetree/bindings/sound/st,stm32-sai.yaml
+++ b/Documentation/devicetree/bindings/sound/st,stm32-sai.yaml
@@ -164,7 +164,7 @@ allOf:
properties:
compatible:
contains:
- const: st,stm32mph7-sai
+ const: st,stm32h7-sai
then:
properties:
clocks:
diff --git a/Documentation/filesystems/overlayfs.rst b/Documentation/filesystems/overlayfs.rst
index af5a69f87da4..eb846518e6ac 100644
--- a/Documentation/filesystems/overlayfs.rst
+++ b/Documentation/filesystems/overlayfs.rst
@@ -783,6 +783,56 @@ controlled by the "uuid" mount option, which supports these values:
mounted with "uuid=on".
+Durability and copy up
+----------------------
+
+The fsync(2) system call ensures that the data and metadata of a file
+are safely written to the backing storage, which is expected to
+guarantee the existence of the information post system crash.
+
+Without an fsync(2) call, there is no guarantee that the observed
+data after a system crash will be either the old or the new data, but
+in practice, the observed data after crash is often the old or new data
+or a mix of both.
+
+When an overlayfs file is modified for the first time, copy up will
+create a copy of the lower file and its parent directories in the upper
+layer. Since the Linux filesystem API does not enforce any particular
+ordering on storing changes without explicit fsync(2) calls, in case
+of a system crash, the upper file could end up with no data at all
+(i.e. zeros), which would be an unusual outcome. To avoid this
+experience, overlayfs calls fsync(2) on the upper file before completing
+data copy up with rename(2) or link(2) to make the copy up "atomic".
+
+By default, overlayfs does not explicitly call fsync(2) on copied up
+directories or on metadata-only copy up, so it provides no guarantee to
+persist the user's modification unless the user calls fsync(2).
+The fsync during copy up only guarantees that if a copy up is observed
+after a crash, the observed data is not zeroes or intermediate values
+from the copy up staging area.
+
+On traditional local filesystems with a single journal (e.g. ext4, xfs),
+fsync on a file also persists the parent directory changes, because they
+are usually modified in the same transaction, so metadata durability during
+data copy up effectively comes for free. Overlayfs further limits risk by
+disallowing network filesystems as upper layer.
+
+Overlayfs can be tuned to prefer performance or durability when storing
+to the underlying upper layer. This is controlled by the "fsync" mount
+option, which supports these values:
+
+- "auto": (default)
+ Call fsync(2) on upper file before completion of data copy up.
+ No explicit fsync(2) on directory or metadata-only copy up.
+- "strict":
+ Call fsync(2) on upper file and directories before completion of any
+ copy up.
+- "volatile": [*]
+ Prefer performance over durability (see `Volatile mount`_)
+
+[*] The mount option "volatile" is an alias to "fsync=volatile".
+
+
Volatile mount
--------------
diff --git a/Documentation/hwmon/adm1177.rst b/Documentation/hwmon/adm1177.rst
index 1c85a2af92bf..375f6d6e03a7 100644
--- a/Documentation/hwmon/adm1177.rst
+++ b/Documentation/hwmon/adm1177.rst
@@ -27,10 +27,10 @@ for details.
Sysfs entries
-------------
-The following attributes are supported. Current maxim attribute
+The following attributes are supported. Current maximum attribute
is read-write, all other attributes are read-only.
-in0_input Measured voltage in microvolts.
+in0_input Measured voltage in millivolts.
-curr1_input Measured current in microamperes.
-curr1_max_alarm Overcurrent alarm in microamperes.
+curr1_input Measured current in milliamperes.
+curr1_max Overcurrent shutdown threshold in milliamperes.
diff --git a/Documentation/hwmon/peci-cputemp.rst b/Documentation/hwmon/peci-cputemp.rst
index fe0422248dc5..266b62a46f49 100644
--- a/Documentation/hwmon/peci-cputemp.rst
+++ b/Documentation/hwmon/peci-cputemp.rst
@@ -51,8 +51,9 @@ temp1_max Provides thermal control temperature of the CPU package
temp1_crit Provides shutdown temperature of the CPU package which
is also known as the maximum processor junction
temperature, Tjmax or Tprochot.
-temp1_crit_hyst Provides the hysteresis value from Tcontrol to Tjmax of
- the CPU package.
+temp1_crit_hyst Provides the hysteresis temperature of the CPU
+ package. Returns Tcontrol, the temperature at which
+ the critical condition clears.
temp2_label "DTS"
temp2_input Provides current temperature of the CPU package scaled
@@ -62,8 +63,9 @@ temp2_max Provides thermal control temperature of the CPU package
temp2_crit Provides shutdown temperature of the CPU package which
is also known as the maximum processor junction
temperature, Tjmax or Tprochot.
-temp2_crit_hyst Provides the hysteresis value from Tcontrol to Tjmax of
- the CPU package.
+temp2_crit_hyst Provides the hysteresis temperature of the CPU
+ package. Returns Tcontrol, the temperature at which
+ the critical condition clears.
temp3_label "Tcontrol"
temp3_input Provides current Tcontrol temperature of the CPU
diff --git a/Documentation/userspace-api/landlock.rst b/Documentation/userspace-api/landlock.rst
index 13134bccdd39..7f86d7a37dc2 100644
--- a/Documentation/userspace-api/landlock.rst
+++ b/Documentation/userspace-api/landlock.rst
@@ -8,7 +8,7 @@ Landlock: unprivileged access control
=====================================
:Author: Mickaël Salaün
-:Date: January 2026
+:Date: March 2026
The goal of Landlock is to enable restriction of ambient rights (e.g. global
filesystem or network access) for a set of processes. Because Landlock
@@ -197,12 +197,27 @@ similar backwards compatibility check is needed for the restrict flags
.. code-block:: c
- __u32 restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON;
- if (abi < 7) {
- /* Clear logging flags unsupported before ABI 7. */
+ __u32 restrict_flags =
+ LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON |
+ LANDLOCK_RESTRICT_SELF_TSYNC;
+ switch (abi) {
+ case 1 ... 6:
+ /* Removes logging flags for ABI < 7 */
restrict_flags &= ~(LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF |
LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON |
LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF);
+ __attribute__((fallthrough));
+ case 7:
+ /*
+ * Removes multithreaded enforcement flag for ABI < 8
+ *
+ * WARNING: Without this flag, calling landlock_restrict_self(2) is
+ * only equivalent if the calling process is single-threaded. Below
+ * ABI v8 (and as of ABI v8, when not using this flag), a Landlock
+ * policy would only be enforced for the calling thread and its
+ * children (and not for all threads, including parents and siblings).
+ */
+ restrict_flags &= ~LANDLOCK_RESTRICT_SELF_TSYNC;
}
The next step is to restrict the current thread from gaining more privileges
diff --git a/MAINTAINERS b/MAINTAINERS
index 980ba6ff3011..2f645897148c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3986,7 +3986,7 @@ F: drivers/hwmon/asus-ec-sensors.c
ASUS NOTEBOOKS AND EEEPC ACPI/WMI EXTRAS DRIVERS
M: Corentin Chary <corentin.chary@gmail.com>
M: Luke D. Jones <luke@ljones.dev>
-M: Denis Benato <benato.denis96@gmail.com>
+M: Denis Benato <denis.benato@linux.dev>
L: platform-driver-x86@vger.kernel.org
S: Maintained
W: https://asus-linux.org/
@@ -8625,8 +8625,14 @@ F: drivers/gpu/drm/lima/
F: include/uapi/drm/lima_drm.h
DRM DRIVERS FOR LOONGSON
+M: Jianmin Lv <lvjianmin@loongson.cn>
+M: Qianhai Wu <wuqianhai@loongson.cn>
+R: Huacai Chen <chenhuacai@kernel.org>
+R: Mingcong Bai <jeffbai@aosc.io>
+R: Xi Ruoyao <xry111@xry111.site>
+R: Icenowy Zheng <zhengxingda@iscas.ac.cn>
L: dri-devel@lists.freedesktop.org
-S: Orphan
+S: Maintained
T: git https://gitlab.freedesktop.org/drm/misc/kernel.git
F: drivers/gpu/drm/loongson/
@@ -9610,7 +9616,12 @@ F: include/linux/ext2*
EXT4 FILE SYSTEM
M: "Theodore Ts'o" <tytso@mit.edu>
-M: Andreas Dilger <adilger.kernel@dilger.ca>
+R: Andreas Dilger <adilger.kernel@dilger.ca>
+R: Baokun Li <libaokun@linux.alibaba.com>
+R: Jan Kara <jack@suse.cz>
+R: Ojaswin Mujoo <ojaswin@linux.ibm.com>
+R: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
+R: Zhang Yi <yi.zhang@huawei.com>
L: linux-ext4@vger.kernel.org
S: Maintained
W: http://ext4.wiki.kernel.org
@@ -12006,7 +12017,6 @@ I2C SUBSYSTEM
M: Wolfram Sang <wsa+renesas@sang-engineering.com>
L: linux-i2c@vger.kernel.org
S: Maintained
-W: https://i2c.wiki.kernel.org/
Q: https://patchwork.ozlabs.org/project/linux-i2c/list/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git
F: Documentation/i2c/
@@ -12032,7 +12042,6 @@ I2C SUBSYSTEM HOST DRIVERS
M: Andi Shyti <andi.shyti@kernel.org>
L: linux-i2c@vger.kernel.org
S: Maintained
-W: https://i2c.wiki.kernel.org/
Q: https://patchwork.ozlabs.org/project/linux-i2c/list/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/andi.shyti/linux.git
F: Documentation/devicetree/bindings/i2c/
@@ -16874,7 +16883,7 @@ M: Lorenzo Stoakes <ljs@kernel.org>
R: Rik van Riel <riel@surriel.com>
R: Liam R. Howlett <Liam.Howlett@oracle.com>
R: Vlastimil Babka <vbabka@kernel.org>
-R: Harry Yoo <harry.yoo@oracle.com>
+R: Harry Yoo <harry@kernel.org>
R: Jann Horn <jannh@google.com>
L: linux-mm@kvack.org
S: Maintained
@@ -24340,7 +24349,7 @@ F: drivers/nvmem/layouts/sl28vpd.c
SLAB ALLOCATOR
M: Vlastimil Babka <vbabka@kernel.org>
-M: Harry Yoo <harry.yoo@oracle.com>
+M: Harry Yoo <harry@kernel.org>
M: Andrew Morton <akpm@linux-foundation.org>
R: Hao Li <hao.li@linux.dev>
R: Christoph Lameter <cl@gentwo.org>
diff --git a/Makefile b/Makefile
index 2294decf0afc..6b1d9fb1a6b4 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 7
PATCHLEVEL = 0
SUBLEVEL = 0
-EXTRAVERSION = -rc5
+EXTRAVERSION = -rc6
NAME = Baby Opossum Posse
# *DOCUMENTATION*
@@ -1654,7 +1654,7 @@ CLEAN_FILES += vmlinux.symvers modules-only.symvers \
modules.builtin.ranges vmlinux.o.map vmlinux.unstripped \
compile_commands.json rust/test \
rust-project.json .vmlinux.objs .vmlinux.export.c \
- .builtin-dtbs-list .builtin-dtb.S
+ .builtin-dtbs-list .builtin-dtbs.S
# Directories & files removed with 'make mrproper'
MRPROPER_FILES += include/config include/generated \
diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c
index c5c5644b1878..a024d9a770dc 100644
--- a/arch/arm64/kvm/at.c
+++ b/arch/arm64/kvm/at.c
@@ -1753,7 +1753,7 @@ int __kvm_at_swap_desc(struct kvm *kvm, gpa_t ipa, u64 old, u64 new)
if (!writable)
return -EPERM;
- ptep = (u64 __user *)hva + offset;
+ ptep = (void __user *)hva + offset;
if (cpus_have_final_cap(ARM64_HAS_LSE_ATOMICS))
r = __lse_swap_desc(ptep, old, new);
else
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 959532422d3a..b963fd975aac 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -247,6 +247,20 @@ void kvm_reset_vcpu(struct kvm_vcpu *vcpu)
kvm_vcpu_set_be(vcpu);
*vcpu_pc(vcpu) = target_pc;
+
+ /*
+ * We may come from a state where either a PC update was
+ * pending (SMC call resulting in PC being increpented to
+ * skip the SMC) or a pending exception. Make sure we get
+ * rid of all that, as this cannot be valid out of reset.
+ *
+ * Note that clearing the exception mask also clears PC
+ * updates, but that's an implementation detail, and we
+ * really want to make it explicit.
+ */
+ vcpu_clear_flag(vcpu, PENDING_EXCEPTION);
+ vcpu_clear_flag(vcpu, EXCEPT_MASK);
+ vcpu_clear_flag(vcpu, INCREMENT_PC);
vcpu_set_reg(vcpu, 0, reset_state.r0);
}
diff --git a/arch/loongarch/include/asm/linkage.h b/arch/loongarch/include/asm/linkage.h
index e2eca1a25b4e..a1bd6a3ee03a 100644
--- a/arch/loongarch/include/asm/linkage.h
+++ b/arch/loongarch/include/asm/linkage.h
@@ -41,4 +41,40 @@
.cfi_endproc; \
SYM_END(name, SYM_T_NONE)
+/*
+ * This is for the signal handler trampoline, which is used as the return
+ * address of the signal handlers in userspace instead of called normally.
+ * The long standing libgcc bug https://gcc.gnu.org/PR124050 requires a
+ * nop between .cfi_startproc and the actual address of the trampoline, so
+ * we cannot simply use SYM_FUNC_START.
+ *
+ * This wrapper also contains all the .cfi_* directives for recovering
+ * the content of the GPRs and the "return address" (where the rt_sigreturn
+ * syscall will jump to), assuming there is a struct rt_sigframe (where
+ * a struct sigcontext containing those information we need to recover) at
+ * $sp. The "DWARF for the LoongArch(TM) Architecture" manual states
+ * column 0 is for $zero, but it does not make too much sense to
+ * save/restore the hardware zero register. Repurpose this column here
+ * for the return address (here it's not the content of $ra we cannot use
+ * the default column 3).
+ */
+#define SYM_SIGFUNC_START(name) \
+ .cfi_startproc; \
+ .cfi_signal_frame; \
+ .cfi_def_cfa 3, RT_SIGFRAME_SC; \
+ .cfi_return_column 0; \
+ .cfi_offset 0, SC_PC; \
+ \
+ .irp num, 1, 2, 3, 4, 5, 6, 7, 8, \
+ 9, 10, 11, 12, 13, 14, 15, 16, \
+ 17, 18, 19, 20, 21, 22, 23, 24, \
+ 25, 26, 27, 28, 29, 30, 31; \
+ .cfi_offset \num, SC_REGS + \num * SZREG; \
+ .endr; \
+ \
+ nop; \
+ SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
+
+#define SYM_SIGFUNC_END(name) SYM_FUNC_END(name)
+
#endif
diff --git a/arch/loongarch/include/asm/sigframe.h b/arch/loongarch/include/asm/sigframe.h
new file mode 100644
index 000000000000..109298b8d7e0
--- /dev/null
+++ b/arch/loongarch/include/asm/sigframe.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#include <asm/siginfo.h>
+#include <asm/ucontext.h>
+
+struct rt_sigframe {
+ struct siginfo rs_info;
+ struct ucontext rs_uctx;
+};
diff --git a/arch/loongarch/kernel/asm-offsets.c b/arch/loongarch/kernel/asm-offsets.c
index 3017c7157600..2cc953f113ac 100644
--- a/arch/loongarch/kernel/asm-offsets.c
+++ b/arch/loongarch/kernel/asm-offsets.c
@@ -16,6 +16,7 @@
#include <asm/ptrace.h>
#include <asm/processor.h>
#include <asm/ftrace.h>
+#include <asm/sigframe.h>
#include <vdso/datapage.h>
static void __used output_ptreg_defines(void)
@@ -220,6 +221,7 @@ static void __used output_sc_defines(void)
COMMENT("Linux sigcontext offsets.");
OFFSET(SC_REGS, sigcontext, sc_regs);
OFFSET(SC_PC, sigcontext, sc_pc);
+ OFFSET(RT_SIGFRAME_SC, rt_sigframe, rs_uctx.uc_mcontext);
BLANK();
}
diff --git a/arch/loongarch/kernel/env.c b/arch/loongarch/kernel/env.c
index 841206fde3ab..652456768b55 100644
--- a/arch/loongarch/kernel/env.c
+++ b/arch/loongarch/kernel/env.c
@@ -42,16 +42,15 @@ static int __init init_cpu_fullname(void)
int cpu, ret;
char *cpuname;
const char *model;
- struct device_node *root;
/* Parsing cpuname from DTS model property */
- root = of_find_node_by_path("/");
- ret = of_property_read_string(root, "model", &model);
+ ret = of_property_read_string(of_root, "model", &model);
if (ret == 0) {
cpuname = kstrdup(model, GFP_KERNEL);
+ if (!cpuname)
+ return -ENOMEM;
loongson_sysconf.cpuname = strsep(&cpuname, " ");
}
- of_node_put(root);
if (loongson_sysconf.cpuname && !strncmp(loongson_sysconf.cpuname, "Loongson", 8)) {
for (cpu = 0; cpu < NR_CPUS; cpu++)
diff --git a/arch/loongarch/kernel/signal.c b/arch/loongarch/kernel/signal.c
index c9f7ca778364..d4151d2fb82e 100644
--- a/arch/loongarch/kernel/signal.c
+++ b/arch/loongarch/kernel/signal.c
@@ -35,6 +35,7 @@
#include <asm/cpu-features.h>
#include <asm/fpu.h>
#include <asm/lbt.h>
+#include <asm/sigframe.h>
#include <asm/ucontext.h>
#include <asm/vdso.h>
@@ -51,11 +52,6 @@
#define lock_lbt_owner() ({ preempt_disable(); pagefault_disable(); })
#define unlock_lbt_owner() ({ pagefault_enable(); preempt_enable(); })
-struct rt_sigframe {
- struct siginfo rs_info;
- struct ucontext rs_uctx;
-};
-
struct _ctx_layout {
struct sctx_info *addr;
unsigned int size;
diff --git a/arch/loongarch/kvm/intc/eiointc.c b/arch/loongarch/kvm/intc/eiointc.c
index d2acb4d09e73..003bd773e11c 100644
--- a/arch/loongarch/kvm/intc/eiointc.c
+++ b/arch/loongarch/kvm/intc/eiointc.c
@@ -83,7 +83,7 @@ static inline void eiointc_update_sw_coremap(struct loongarch_eiointc *s,
if (!(s->status & BIT(EIOINTC_ENABLE_CPU_ENCODE))) {
cpuid = ffs(cpuid) - 1;
- cpuid = (cpuid >= 4) ? 0 : cpuid;
+ cpuid = ((cpuid < 0) || (cpuid >= 4)) ? 0 : cpuid;
}
vcpu = kvm_get_vcpu_by_cpuid(s->kvm, cpuid);
@@ -472,34 +472,34 @@ static int kvm_eiointc_regs_access(struct kvm_device *dev,
switch (addr) {
case EIOINTC_NODETYPE_START ... EIOINTC_NODETYPE_END:
offset = (addr - EIOINTC_NODETYPE_START) / 4;
- p = s->nodetype + offset * 4;
+ p = (void *)s->nodetype + offset * 4;
break;
case EIOINTC_IPMAP_START ... EIOINTC_IPMAP_END:
offset = (addr - EIOINTC_IPMAP_START) / 4;
- p = &s->ipmap + offset * 4;
+ p = (void *)&s->ipmap + offset * 4;
break;
case EIOINTC_ENABLE_START ... EIOINTC_ENABLE_END:
offset = (addr - EIOINTC_ENABLE_START) / 4;
- p = s->enable + offset * 4;
+ p = (void *)s->enable + offset * 4;
break;
case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END:
offset = (addr - EIOINTC_BOUNCE_START) / 4;
- p = s->bounce + offset * 4;
+ p = (void *)s->bounce + offset * 4;
break;
case EIOINTC_ISR_START ... EIOINTC_ISR_END:
offset = (addr - EIOINTC_ISR_START) / 4;
- p = s->isr + offset * 4;
+ p = (void *)s->isr + offset * 4;
break;
case EIOINTC_COREISR_START ... EIOINTC_COREISR_END:
if (cpu >= s->num_cpu)
return -EINVAL;
offset = (addr - EIOINTC_COREISR_START) / 4;
- p = s->coreisr[cpu] + offset * 4;
+ p = (void *)s->coreisr[cpu] + offset * 4;
break;
case EIOINTC_COREMAP_START ... EIOINTC_COREMAP_END:
offset = (addr - EIOINTC_COREMAP_START) / 4;
- p = s->coremap + offset * 4;
+ p = (void *)s->coremap + offset * 4;
break;
default:
kvm_err("%s: unknown eiointc register, addr = %d\n", __func__, addr);
diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
index 8ffd50a470e6..831f381a8fd1 100644
--- a/arch/loongarch/kvm/vcpu.c
+++ b/arch/loongarch/kvm/vcpu.c
@@ -588,6 +588,9 @@ struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid)
{
struct kvm_phyid_map *map;
+ if (cpuid < 0)
+ return NULL;
+
if (cpuid >= KVM_MAX_PHYID)
return NULL;
diff --git a/arch/loongarch/pci/pci.c b/arch/loongarch/pci/pci.c
index d923295ab8c6..d233ea2218fe 100644
--- a/arch/loongarch/pci/pci.c
+++ b/arch/loongarch/pci/pci.c
@@ -5,9 +5,11 @@
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/acpi.h>
+#include <linux/delay.h>
#include <linux/types.h>
#include <linux/pci.h>
#include <linux/vgaarb.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
#include <asm/cacheflush.h>
#include <asm/loongson.h>
@@ -15,6 +17,9 @@
#define PCI_DEVICE_ID_LOONGSON_DC1 0x7a06
#define PCI_DEVICE_ID_LOONGSON_DC2 0x7a36
#define PCI_DEVICE_ID_LOONGSON_DC3 0x7a46
+#define PCI_DEVICE_ID_LOONGSON_GPU1 0x7a15
+#define PCI_DEVICE_ID_LOONGSON_GPU2 0x7a25
+#define PCI_DEVICE_ID_LOONGSON_GPU3 0x7a35
int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn,
int reg, int len, u32 *val)
@@ -99,3 +104,78 @@ static void pci_fixup_vgadev(struct pci_dev *pdev)
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_DC1, pci_fixup_vgadev);
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_DC2, pci_fixup_vgadev);
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_DC3, pci_fixup_vgadev);
+
+#define CRTC_NUM_MAX 2
+#define CRTC_OUTPUT_ENABLE 0x100
+
+static void loongson_gpu_fixup_dma_hang(struct pci_dev *pdev, bool on)
+{
+ u32 i, val, count, crtc_offset, device;
+ void __iomem *crtc_reg, *base, *regbase;
+ static u32 crtc_status[CRTC_NUM_MAX] = { 0 };
+
+ base = pdev->bus->ops->map_bus(pdev->bus, pdev->devfn + 1, 0);
+ device = readw(base + PCI_DEVICE_ID);
+
+ regbase = ioremap(readq(base + PCI_BASE_ADDRESS_0) & ~0xffull, SZ_64K);
+ if (!regbase) {
+ pci_err(pdev, "Failed to ioremap()\n");
+ return;
+ }
+
+ switch (device) {
+ case PCI_DEVICE_ID_LOONGSON_DC2:
+ crtc_reg = regbase + 0x1240;
+ crtc_offset = 0x10;
+ break;
+ case PCI_DEVICE_ID_LOONGSON_DC3:
+ crtc_reg = regbase;
+ crtc_offset = 0x400;
+ break;
+ }
+
+ for (i = 0; i < CRTC_NUM_MAX; i++, crtc_reg += crtc_offset) {
+ val = readl(crtc_reg);
+
+ if (!on)
+ crtc_status[i] = val;
+
+ /* No need to fixup if the status is off at startup. */
+ if (!(crtc_status[i] & CRTC_OUTPUT_ENABLE))
+ continue;
+
+ if (on)
+ val |= CRTC_OUTPUT_ENABLE;
+ else
+ val &= ~CRTC_OUTPUT_ENABLE;
+
+ mb();
+ writel(val, crtc_reg);
+
+ for (count = 0; count < 40; count++) {
+ val = readl(crtc_reg) & CRTC_OUTPUT_ENABLE;
+ if ((on && val) || (!on && !val))
+ break;
+ udelay(1000);
+ }
+
+ pci_info(pdev, "DMA hang fixup at reg[0x%lx]: 0x%x\n",
+ (unsigned long)crtc_reg & 0xffff, readl(crtc_reg));
+ }
+
+ iounmap(regbase);
+}
+
+static void pci_fixup_dma_hang_early(struct pci_dev *pdev)
+{
+ loongson_gpu_fixup_dma_hang(pdev, false);
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_GPU2, pci_fixup_dma_hang_early);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_GPU3, pci_fixup_dma_hang_early);
+
+static void pci_fixup_dma_hang_final(struct pci_dev *pdev)
+{
+ loongson_gpu_fixup_dma_hang(pdev, true);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_GPU2, pci_fixup_dma_hang_final);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_GPU3, pci_fixup_dma_hang_final);
diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile
index 520f1513f07d..294c16b9517f 100644
--- a/arch/loongarch/vdso/Makefile
+++ b/arch/loongarch/vdso/Makefile
@@ -26,7 +26,7 @@ cflags-vdso := $(ccflags-vdso) \
$(filter -W%,$(filter-out -Wa$(comma)%,$(KBUILD_CFLAGS))) \
-std=gnu11 -fms-extensions -O2 -g -fno-strict-aliasing -fno-common -fno-builtin \
-fno-stack-protector -fno-jump-tables -DDISABLE_BRANCH_PROFILING \
- $(call cc-option, -fno-asynchronous-unwind-tables) \
+ $(call cc-option, -fasynchronous-unwind-tables) \
$(call cc-option, -fno-stack-protector)
aflags-vdso := $(ccflags-vdso) \
-D__ASSEMBLY__ -Wa,-gdwarf-2
@@ -41,7 +41,7 @@ endif
# VDSO linker flags.
ldflags-y := -Bsymbolic --no-undefined -soname=linux-vdso.so.1 \
- $(filter -E%,$(KBUILD_CFLAGS)) -shared --build-id -T
+ $(filter -E%,$(KBUILD_CFLAGS)) -shared --build-id --eh-frame-hdr -T
#
# Shared build commands.
diff --git a/arch/loongarch/vdso/sigreturn.S b/arch/loongarch/vdso/sigreturn.S
index 9cb3c58fad03..59f940d928de 100644
--- a/arch/loongarch/vdso/sigreturn.S
+++ b/arch/loongarch/vdso/sigreturn.S
@@ -12,13 +12,13 @@
#include <asm/regdef.h>
#include <asm/asm.h>
+#include <asm/asm-offsets.h>
.section .text
- .cfi_sections .debug_frame
-SYM_FUNC_START(__vdso_rt_sigreturn)
+SYM_SIGFUNC_START(__vdso_rt_sigreturn)
li.w a7, __NR_rt_sigreturn
syscall 0
-SYM_FUNC_END(__vdso_rt_sigreturn)
+SYM_SIGFUNC_END(__vdso_rt_sigreturn)
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
index f3184073e754..dad02f5b3c8d 100644
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -62,8 +62,8 @@ do { \
* @size: number of elements in array
*/
#define array_index_mask_nospec array_index_mask_nospec
-static inline unsigned long array_index_mask_nospec(unsigned long index,
- unsigned long size)
+static __always_inline unsigned long array_index_mask_nospec(unsigned long index,
+ unsigned long size)
{
unsigned long mask;
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 64a50f0862aa..3039c88daa63 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -710,6 +710,9 @@ void kvm_arch_crypto_clear_masks(struct kvm *kvm);
void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
unsigned long *aqm, unsigned long *adm);
+#define SIE64_RETURN_NORMAL 0
+#define SIE64_RETURN_MCCK 1
+
int __sie64a(phys_addr_t sie_block_phys, struct kvm_s390_sie_block *sie_block, u64 *rsa,
unsigned long gasce);
diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h
index c9ae680a28af..ac3606c3babe 100644
--- a/arch/s390/include/asm/stacktrace.h
+++ b/arch/s390/include/asm/stacktrace.h
@@ -62,7 +62,7 @@ struct stack_frame {
struct {
unsigned long sie_control_block;
unsigned long sie_savearea;
- unsigned long sie_reason;
+ unsigned long sie_return;
unsigned long sie_flags;
unsigned long sie_control_block_phys;
unsigned long sie_guest_asce;
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index e1a5b5b54e4f..fbd26f3e9f96 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -63,7 +63,7 @@ int main(void)
OFFSET(__SF_EMPTY, stack_frame, empty[0]);
OFFSET(__SF_SIE_CONTROL, stack_frame, sie_control_block);
OFFSET(__SF_SIE_SAVEAREA, stack_frame, sie_savearea);
- OFFSET(__SF_SIE_REASON, stack_frame, sie_reason);
+ OFFSET(__SF_SIE_RETURN, stack_frame, sie_return);
OFFSET(__SF_SIE_FLAGS, stack_frame, sie_flags);
OFFSET(__SF_SIE_CONTROL_PHYS, stack_frame, sie_control_block_phys);
OFFSET(__SF_SIE_GUEST_ASCE, stack_frame, sie_guest_asce);
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 4873fe9d891b..bb806d1ddae0 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -200,7 +200,7 @@ SYM_FUNC_START(__sie64a)
stg %r3,__SF_SIE_CONTROL(%r15) # ...and virtual addresses
stg %r4,__SF_SIE_SAVEAREA(%r15) # save guest register save area
stg %r5,__SF_SIE_GUEST_ASCE(%r15) # save guest asce
- xc __SF_SIE_REASON(8,%r15),__SF_SIE_REASON(%r15) # reason code = 0
+ xc __SF_SIE_RETURN(8,%r15),__SF_SIE_RETURN(%r15) # return code = 0
mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r14) # copy thread flags
lmg %r0,%r13,0(%r4) # load guest gprs 0-13
mvi __TI_sie(%r14),1
@@ -237,7 +237,7 @@ SYM_INNER_LABEL(sie_exit, SYM_L_GLOBAL)
xgr %r4,%r4
xgr %r5,%r5
lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers
- lg %r2,__SF_SIE_REASON(%r15) # return exit reason code
+ lg %r2,__SF_SIE_RETURN(%r15) # return sie return code
BR_EX %r14
SYM_FUNC_END(__sie64a)
EXPORT_SYMBOL(__sie64a)
@@ -271,6 +271,7 @@ SYM_CODE_START(system_call)
xgr %r9,%r9
xgr %r10,%r10
xgr %r11,%r11
+ xgr %r12,%r12
la %r2,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs
mvc __PT_R8(64,%r2),__LC_SAVE_AREA(%r13)
MBEAR %r2,%r13
@@ -407,6 +408,7 @@ SYM_CODE_START(\name)
xgr %r6,%r6
xgr %r7,%r7
xgr %r10,%r10
+ xgr %r12,%r12
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
mvc __PT_R8(64,%r11),__LC_SAVE_AREA(%r13)
MBEAR %r11,%r13
@@ -496,6 +498,7 @@ SYM_CODE_START(mcck_int_handler)
xgr %r6,%r6
xgr %r7,%r7
xgr %r10,%r10
+ xgr %r12,%r12
stmg %r8,%r9,__PT_PSW(%r11)
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index a55abbf65333..94fbfad49f62 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -487,8 +487,8 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
mcck_dam_code = (mci.val & MCIC_SUBCLASS_MASK);
if (test_cpu_flag(CIF_MCCK_GUEST) &&
(mcck_dam_code & MCCK_CODE_NO_GUEST) != mcck_dam_code) {
- /* Set exit reason code for host's later handling */
- *((long *)(regs->gprs[15] + __SF_SIE_REASON)) = -EINTR;
+ /* Set sie return code for host's later handling */
+ ((struct stack_frame *)regs->gprs[15])->sie_return = SIE64_RETURN_MCCK;
}
clear_cpu_flag(CIF_MCCK_GUEST);
diff --git a/arch/s390/kernel/syscall.c b/arch/s390/kernel/syscall.c
index 795b6cca74c9..d103c853e120 100644
--- a/arch/s390/kernel/syscall.c
+++ b/arch/s390/kernel/syscall.c
@@ -13,6 +13,7 @@
*/
#include <linux/cpufeature.h>
+#include <linux/nospec.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/mm.h>
@@ -131,8 +132,10 @@ void noinstr __do_syscall(struct pt_regs *regs, int per_trap)
if (unlikely(test_and_clear_pt_regs_flag(regs, PIF_SYSCALL_RET_SET)))
goto out;
regs->gprs[2] = -ENOSYS;
- if (likely(nr < NR_syscalls))
+ if (likely(nr < NR_syscalls)) {
+ nr = array_index_nospec(nr, NR_syscalls);
regs->gprs[2] = sys_call_table[nr](regs);
+ }
out:
syscall_exit_to_user_mode(regs);
}
diff --git a/arch/s390/kvm/dat.c b/arch/s390/kvm/dat.c
index 670404d4fa44..7b8d70fe406d 100644
--- a/arch/s390/kvm/dat.c
+++ b/arch/s390/kvm/dat.c
@@ -135,32 +135,6 @@ int dat_set_asce_limit(struct kvm_s390_mmu_cache *mc, union asce *asce, int newt
}
/**
- * dat_crstep_xchg() - Exchange a gmap CRSTE with another.
- * @crstep: Pointer to the CRST entry
- * @new: Replacement entry.
- * @gfn: The affected guest address.
- * @asce: The ASCE of the address space.
- *
- * Context: This function is assumed to be called with kvm->mmu_lock held.
- */
-void dat_crstep_xchg(union crste *crstep, union crste new, gfn_t gfn, union asce asce)
-{
- if (crstep->h.i) {
- WRITE_ONCE(*crstep, new);
- return;
- } else if (cpu_has_edat2()) {
- crdte_crste(crstep, *crstep, new, gfn, asce);
- return;
- }
-
- if (machine_has_tlb_guest())
- idte_crste(crstep, gfn, IDTE_GUEST_ASCE, asce, IDTE_GLOBAL);
- else
- idte_crste(crstep, gfn, 0, NULL_ASCE, IDTE_GLOBAL);
- WRITE_ONCE(*crstep, new);
-}
-
-/**
* dat_crstep_xchg_atomic() - Atomically exchange a gmap CRSTE with another.
* @crstep: Pointer to the CRST entry.
* @old: Expected old value.
@@ -175,8 +149,8 @@ void dat_crstep_xchg(union crste *crstep, union crste new, gfn_t gfn, union asce
*
* Return: %true if the exchange was successful.
*/
-bool dat_crstep_xchg_atomic(union crste *crstep, union crste old, union crste new, gfn_t gfn,
- union asce asce)
+bool __must_check dat_crstep_xchg_atomic(union crste *crstep, union crste old, union crste new,
+ gfn_t gfn, union asce asce)
{
if (old.h.i)
return arch_try_cmpxchg((long *)crstep, &old.val, new.val);
@@ -292,6 +266,7 @@ static int dat_split_ste(struct kvm_s390_mmu_cache *mc, union pmd *pmdp, gfn_t g
pt->ptes[i].val = init.val | i * PAGE_SIZE;
/* No need to take locks as the page table is not installed yet. */
pgste_init.prefix_notif = old.s.fc1.prefix_notif;
+ pgste_init.vsie_notif = old.s.fc1.vsie_notif;
pgste_init.pcl = uses_skeys && init.h.i;
dat_init_pgstes(pt, pgste_init.val);
} else {
@@ -893,7 +868,8 @@ static long _dat_slot_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct d
/* This table entry needs to be updated. */
if (walk->start <= gfn && walk->end >= next) {
- dat_crstep_xchg_atomic(crstep, crste, new_crste, gfn, walk->asce);
+ if (!dat_crstep_xchg_atomic(crstep, crste, new_crste, gfn, walk->asce))
+ return -EINVAL;
/* A lower level table was present, needs to be freed. */
if (!crste.h.fc && !crste.h.i) {
if (is_pmd(crste))
@@ -1021,67 +997,21 @@ bool dat_test_age_gfn(union asce asce, gfn_t start, gfn_t end)
return _dat_walk_gfn_range(start, end, asce, &test_age_ops, 0, NULL) > 0;
}
-int dat_link(struct kvm_s390_mmu_cache *mc, union asce asce, int level,
- bool uses_skeys, struct guest_fault *f)
-{
- union crste oldval, newval;
- union pte newpte, oldpte;
- union pgste pgste;
- int rc = 0;
-
- rc = dat_entry_walk(mc, f->gfn, asce, DAT_WALK_ALLOC_CONTINUE, level, &f->crstep, &f->ptep);
- if (rc == -EINVAL || rc == -ENOMEM)
- return rc;
- if (rc)
- return -EAGAIN;
-
- if (WARN_ON_ONCE(unlikely(get_level(f->crstep, f->ptep) > level)))
- return -EINVAL;
-
- if (f->ptep) {
- pgste = pgste_get_lock(f->ptep);
- oldpte = *f->ptep;
- newpte = _pte(f->pfn, f->writable, f->write_attempt | oldpte.s.d, !f->page);
- newpte.s.sd = oldpte.s.sd;
- oldpte.s.sd = 0;
- if (oldpte.val == _PTE_EMPTY.val || oldpte.h.pfra == f->pfn) {
- pgste = __dat_ptep_xchg(f->ptep, pgste, newpte, f->gfn, asce, uses_skeys);
- if (f->callback)
- f->callback(f);
- } else {
- rc = -EAGAIN;
- }
- pgste_set_unlock(f->ptep, pgste);
- } else {
- oldval = READ_ONCE(*f->crstep);
- newval = _crste_fc1(f->pfn, oldval.h.tt, f->writable,
- f->write_attempt | oldval.s.fc1.d);
- newval.s.fc1.sd = oldval.s.fc1.sd;
- if (oldval.val != _CRSTE_EMPTY(oldval.h.tt).val &&
- crste_origin_large(oldval) != crste_origin_large(newval))
- return -EAGAIN;
- if (!dat_crstep_xchg_atomic(f->crstep, oldval, newval, f->gfn, asce))
- return -EAGAIN;
- if (f->callback)
- f->callback(f);
- }
-
- return rc;
-}
-
static long dat_set_pn_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
{
- union crste crste = READ_ONCE(*crstep);
+ union crste newcrste, oldcrste;
int *n = walk->priv;
- if (!crste.h.fc || crste.h.i || crste.h.p)
- return 0;
-
+ do {
+ oldcrste = READ_ONCE(*crstep);
+ if (!oldcrste.h.fc || oldcrste.h.i || oldcrste.h.p)
+ return 0;
+ if (oldcrste.s.fc1.prefix_notif)
+ break;
+ newcrste = oldcrste;
+ newcrste.s.fc1.prefix_notif = 1;
+ } while (!dat_crstep_xchg_atomic(crstep, oldcrste, newcrste, gfn, walk->asce));
*n = 2;
- if (crste.s.fc1.prefix_notif)
- return 0;
- crste.s.fc1.prefix_notif = 1;
- dat_crstep_xchg(crstep, crste, gfn, walk->asce);
return 0;
}
diff --git a/arch/s390/kvm/dat.h b/arch/s390/kvm/dat.h
index 123e11dcd70d..874cc962e196 100644
--- a/arch/s390/kvm/dat.h
+++ b/arch/s390/kvm/dat.h
@@ -160,14 +160,14 @@ union pmd {
unsigned long :44; /* HW */
unsigned long : 3; /* Unused */
unsigned long : 1; /* HW */
+ unsigned long s : 1; /* Special */
unsigned long w : 1; /* Writable soft-bit */
unsigned long r : 1; /* Readable soft-bit */
unsigned long d : 1; /* Dirty */
unsigned long y : 1; /* Young */
- unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
unsigned long : 3; /* HW */
+ unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
unsigned long vsie_notif : 1; /* Referenced in a shadow table */
- unsigned long : 1; /* Unused */
unsigned long : 4; /* HW */
unsigned long sd : 1; /* Soft-Dirty */
unsigned long pr : 1; /* Present */
@@ -183,14 +183,14 @@ union pud {
unsigned long :33; /* HW */
unsigned long :14; /* Unused */
unsigned long : 1; /* HW */
+ unsigned long s : 1; /* Special */
unsigned long w : 1; /* Writable soft-bit */
unsigned long r : 1; /* Readable soft-bit */
unsigned long d : 1; /* Dirty */
unsigned long y : 1; /* Young */
- unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
unsigned long : 3; /* HW */
+ unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
unsigned long vsie_notif : 1; /* Referenced in a shadow table */
- unsigned long : 1; /* Unused */
unsigned long : 4; /* HW */
unsigned long sd : 1; /* Soft-Dirty */
unsigned long pr : 1; /* Present */
@@ -254,14 +254,14 @@ union crste {
struct {
unsigned long :47;
unsigned long : 1; /* HW (should be 0) */
+ unsigned long s : 1; /* Special */
unsigned long w : 1; /* Writable */
unsigned long r : 1; /* Readable */
unsigned long d : 1; /* Dirty */
unsigned long y : 1; /* Young */
- unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
unsigned long : 3; /* HW */
+ unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
unsigned long vsie_notif : 1; /* Referenced in a shadow table */
- unsigned long : 1;
unsigned long : 4; /* HW */
unsigned long sd : 1; /* Soft-Dirty */
unsigned long pr : 1; /* Present */
@@ -540,8 +540,6 @@ int dat_set_slot(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t start, gf
u16 type, u16 param);
int dat_set_prefix_notif_bit(union asce asce, gfn_t gfn);
bool dat_test_age_gfn(union asce asce, gfn_t start, gfn_t end);
-int dat_link(struct kvm_s390_mmu_cache *mc, union asce asce, int level,
- bool uses_skeys, struct guest_fault *f);
int dat_perform_essa(union asce asce, gfn_t gfn, int orc, union essa_state *state, bool *dirty);
long dat_reset_cmma(union asce asce, gfn_t start_gfn);
@@ -938,11 +936,14 @@ static inline bool dat_pudp_xchg_atomic(union pud *pudp, union pud old, union pu
return dat_crstep_xchg_atomic(_CRSTEP(pudp), _CRSTE(old), _CRSTE(new), gfn, asce);
}
-static inline void dat_crstep_clear(union crste *crstep, gfn_t gfn, union asce asce)
+static inline union crste dat_crstep_clear_atomic(union crste *crstep, gfn_t gfn, union asce asce)
{
- union crste newcrste = _CRSTE_EMPTY(crstep->h.tt);
+ union crste oldcrste, empty = _CRSTE_EMPTY(crstep->h.tt);
- dat_crstep_xchg(crstep, newcrste, gfn, asce);
+ do {
+ oldcrste = READ_ONCE(*crstep);
+ } while (!dat_crstep_xchg_atomic(crstep, oldcrste, empty, gfn, asce));
+ return oldcrste;
}
static inline int get_level(union crste *crstep, union pte *ptep)
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 4630b2a067ea..53a8550e7102 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -1434,17 +1434,27 @@ static int _do_shadow_pte(struct gmap *sg, gpa_t raddr, union pte *ptep_h, union
if (rc)
return rc;
- pgste = pgste_get_lock(ptep_h);
- newpte = _pte(f->pfn, f->writable, !p, 0);
- newpte.s.d |= ptep->s.d;
- newpte.s.sd |= ptep->s.sd;
- newpte.h.p &= ptep->h.p;
- pgste = _gmap_ptep_xchg(sg->parent, ptep_h, newpte, pgste, f->gfn, false);
- pgste.vsie_notif = 1;
+ if (!pgste_get_trylock(ptep_h, &pgste))
+ return -EAGAIN;
+ newpte = _pte(f->pfn, f->writable, !p, ptep_h->s.s);
+ newpte.s.d |= ptep_h->s.d;
+ newpte.s.sd |= ptep_h->s.sd;
+ newpte.h.p &= ptep_h->h.p;
+ if (!newpte.h.p && !f->writable) {
+ rc = -EOPNOTSUPP;
+ } else {
+ pgste = _gmap_ptep_xchg(sg->parent, ptep_h, newpte, pgste, f->gfn, false);
+ pgste.vsie_notif = 1;
+ }
pgste_set_unlock(ptep_h, pgste);
+ if (rc)
+ return rc;
+ if (!sg->parent)
+ return -EAGAIN;
newpte = _pte(f->pfn, 0, !p, 0);
- pgste = pgste_get_lock(ptep);
+ if (!pgste_get_trylock(ptep, &pgste))
+ return -EAGAIN;
pgste = __dat_ptep_xchg(ptep, pgste, newpte, gpa_to_gfn(raddr), sg->asce, uses_skeys(sg));
pgste_set_unlock(ptep, pgste);
@@ -1454,7 +1464,7 @@ static int _do_shadow_pte(struct gmap *sg, gpa_t raddr, union pte *ptep_h, union
static int _do_shadow_crste(struct gmap *sg, gpa_t raddr, union crste *host, union crste *table,
struct guest_fault *f, bool p)
{
- union crste newcrste;
+ union crste newcrste, oldcrste;
gfn_t gfn;
int rc;
@@ -1467,16 +1477,28 @@ static int _do_shadow_crste(struct gmap *sg, gpa_t raddr, union crste *host, uni
if (rc)
return rc;
- newcrste = _crste_fc1(f->pfn, host->h.tt, f->writable, !p);
- newcrste.s.fc1.d |= host->s.fc1.d;
- newcrste.s.fc1.sd |= host->s.fc1.sd;
- newcrste.h.p &= host->h.p;
- newcrste.s.fc1.vsie_notif = 1;
- newcrste.s.fc1.prefix_notif = host->s.fc1.prefix_notif;
- _gmap_crstep_xchg(sg->parent, host, newcrste, f->gfn, false);
+ do {
+ /* _gmap_crstep_xchg_atomic() could have unshadowed this shadow gmap */
+ if (!sg->parent)
+ return -EAGAIN;
+ oldcrste = READ_ONCE(*host);
+ newcrste = _crste_fc1(f->pfn, oldcrste.h.tt, f->writable, !p);
+ newcrste.s.fc1.d |= oldcrste.s.fc1.d;
+ newcrste.s.fc1.sd |= oldcrste.s.fc1.sd;
+ newcrste.h.p &= oldcrste.h.p;
+ newcrste.s.fc1.vsie_notif = 1;
+ newcrste.s.fc1.prefix_notif = oldcrste.s.fc1.prefix_notif;
+ newcrste.s.fc1.s = oldcrste.s.fc1.s;
+ if (!newcrste.h.p && !f->writable)
+ return -EOPNOTSUPP;
+ } while (!_gmap_crstep_xchg_atomic(sg->parent, host, oldcrste, newcrste, f->gfn, false));
+ if (!sg->parent)
+ return -EAGAIN;
- newcrste = _crste_fc1(f->pfn, host->h.tt, 0, !p);
- dat_crstep_xchg(table, newcrste, gpa_to_gfn(raddr), sg->asce);
+ newcrste = _crste_fc1(f->pfn, oldcrste.h.tt, 0, !p);
+ gfn = gpa_to_gfn(raddr);
+ while (!dat_crstep_xchg_atomic(table, READ_ONCE(*table), newcrste, gfn, sg->asce))
+ ;
return 0;
}
@@ -1500,21 +1522,31 @@ static int _gaccess_do_shadow(struct kvm_s390_mmu_cache *mc, struct gmap *sg,
if (rc)
return rc;
- /* A race occourred. The shadow mapping is already valid, nothing to do */
- if ((ptep && !ptep->h.i) || (!ptep && crste_leaf(*table)))
+ /* A race occurred. The shadow mapping is already valid, nothing to do */
+ if ((ptep && !ptep->h.i && ptep->h.p == w->p) ||
+ (!ptep && crste_leaf(*table) && !table->h.i && table->h.p == w->p))
return 0;
gl = get_level(table, ptep);
+ /* In case of a real address space */
+ if (w->level <= LEVEL_MEM) {
+ l = TABLE_TYPE_PAGE_TABLE;
+ hl = TABLE_TYPE_REGION1;
+ goto real_address_space;
+ }
+
/*
* Skip levels that are already protected. For each level, protect
* only the page containing the entry, not the whole table.
*/
for (i = gl ; i >= w->level; i--) {
- rc = gmap_protect_rmap(mc, sg, entries[i - 1].gfn, gpa_to_gfn(saddr),
- entries[i - 1].pfn, i, entries[i - 1].writable);
+ rc = gmap_protect_rmap(mc, sg, entries[i].gfn, gpa_to_gfn(saddr),
+ entries[i].pfn, i + 1, entries[i].writable);
if (rc)
return rc;
+ if (!sg->parent)
+ return -EAGAIN;
}
rc = dat_entry_walk(NULL, entries[LEVEL_MEM].gfn, sg->parent->asce, DAT_WALK_LEAF,
@@ -1526,6 +1558,7 @@ static int _gaccess_do_shadow(struct kvm_s390_mmu_cache *mc, struct gmap *sg,
/* Get the smallest granularity */
l = min3(gl, hl, w->level);
+real_address_space:
flags = DAT_WALK_SPLIT_ALLOC | (uses_skeys(sg->parent) ? DAT_WALK_USES_SKEYS : 0);
/* If necessary, create the shadow mapping */
if (l < gl) {
diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c
index ef0c6ebfdde2..645c32c767d2 100644
--- a/arch/s390/kvm/gmap.c
+++ b/arch/s390/kvm/gmap.c
@@ -313,13 +313,16 @@ static long gmap_clear_young_crste(union crste *crstep, gfn_t gfn, gfn_t end, st
struct clear_young_pte_priv *priv = walk->priv;
union crste crste, new;
- crste = READ_ONCE(*crstep);
+ do {
+ crste = READ_ONCE(*crstep);
+
+ if (!crste.h.fc)
+ return 0;
+ if (!crste.s.fc1.y && crste.h.i)
+ return 0;
+ if (crste_prefix(crste) && !gmap_mkold_prefix(priv->gmap, gfn, end))
+ break;
- if (!crste.h.fc)
- return 0;
- if (!crste.s.fc1.y && crste.h.i)
- return 0;
- if (!crste_prefix(crste) || gmap_mkold_prefix(priv->gmap, gfn, end)) {
new = crste;
new.h.i = 1;
new.s.fc1.y = 0;
@@ -328,8 +331,8 @@ static long gmap_clear_young_crste(union crste *crstep, gfn_t gfn, gfn_t end, st
folio_set_dirty(phys_to_folio(crste_origin_large(crste)));
new.s.fc1.d = 0;
new.h.p = 1;
- dat_crstep_xchg(crstep, new, gfn, walk->asce);
- }
+ } while (!dat_crstep_xchg_atomic(crstep, crste, new, gfn, walk->asce));
+
priv->young = 1;
return 0;
}
@@ -391,14 +394,18 @@ static long _gmap_unmap_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct
{
struct gmap_unmap_priv *priv = walk->priv;
struct folio *folio = NULL;
+ union crste old = *crstep;
- if (crstep->h.fc) {
- if (crstep->s.fc1.pr && test_bit(GMAP_FLAG_EXPORT_ON_UNMAP, &priv->gmap->flags))
- folio = phys_to_folio(crste_origin_large(*crstep));
- gmap_crstep_xchg(priv->gmap, crstep, _CRSTE_EMPTY(crstep->h.tt), gfn);
- if (folio)
- uv_convert_from_secure_folio(folio);
- }
+ if (!old.h.fc)
+ return 0;
+
+ if (old.s.fc1.pr && test_bit(GMAP_FLAG_EXPORT_ON_UNMAP, &priv->gmap->flags))
+ folio = phys_to_folio(crste_origin_large(old));
+ /* No races should happen because kvm->mmu_lock is held in write mode */
+ KVM_BUG_ON(!gmap_crstep_xchg_atomic(priv->gmap, crstep, old, _CRSTE_EMPTY(old.h.tt), gfn),
+ priv->gmap->kvm);
+ if (folio)
+ uv_convert_from_secure_folio(folio);
return 0;
}
@@ -474,23 +481,24 @@ static long _crste_test_and_clear_softdirty(union crste *table, gfn_t gfn, gfn_t
if (fatal_signal_pending(current))
return 1;
- crste = READ_ONCE(*table);
- if (!crste.h.fc)
- return 0;
- if (crste.h.p && !crste.s.fc1.sd)
- return 0;
+ do {
+ crste = READ_ONCE(*table);
+ if (!crste.h.fc)
+ return 0;
+ if (crste.h.p && !crste.s.fc1.sd)
+ return 0;
- /*
- * If this large page contains one or more prefixes of vCPUs that are
- * currently running, do not reset the protection, leave it marked as
- * dirty.
- */
- if (!crste.s.fc1.prefix_notif || gmap_mkold_prefix(gmap, gfn, end)) {
+ /*
+ * If this large page contains one or more prefixes of vCPUs that are
+ * currently running, do not reset the protection, leave it marked as
+ * dirty.
+ */
+ if (crste.s.fc1.prefix_notif && !gmap_mkold_prefix(gmap, gfn, end))
+ break;
new = crste;
new.h.p = 1;
new.s.fc1.sd = 0;
- gmap_crstep_xchg(gmap, table, new, gfn);
- }
+ } while (!gmap_crstep_xchg_atomic(gmap, table, crste, new, gfn));
for ( ; gfn < end; gfn++)
mark_page_dirty(gmap->kvm, gfn);
@@ -511,7 +519,7 @@ void gmap_sync_dirty_log(struct gmap *gmap, gfn_t start, gfn_t end)
_dat_walk_gfn_range(start, end, gmap->asce, &walk_ops, 0, gmap);
}
-static int gmap_handle_minor_crste_fault(union asce asce, struct guest_fault *f)
+static int gmap_handle_minor_crste_fault(struct gmap *gmap, struct guest_fault *f)
{
union crste newcrste, oldcrste = READ_ONCE(*f->crstep);
@@ -536,10 +544,8 @@ static int gmap_handle_minor_crste_fault(union asce asce, struct guest_fault *f)
newcrste.s.fc1.d = 1;
newcrste.s.fc1.sd = 1;
}
- if (!oldcrste.s.fc1.d && newcrste.s.fc1.d)
- SetPageDirty(phys_to_page(crste_origin_large(newcrste)));
/* In case of races, let the slow path deal with it. */
- return !dat_crstep_xchg_atomic(f->crstep, oldcrste, newcrste, f->gfn, asce);
+ return !gmap_crstep_xchg_atomic(gmap, f->crstep, oldcrste, newcrste, f->gfn);
}
/* Trying to write on a read-only page, let the slow path deal with it. */
return 1;
@@ -568,8 +574,6 @@ static int _gmap_handle_minor_pte_fault(struct gmap *gmap, union pgste *pgste,
newpte.s.d = 1;
newpte.s.sd = 1;
}
- if (!oldpte.s.d && newpte.s.d)
- SetPageDirty(pfn_to_page(newpte.h.pfra));
*pgste = gmap_ptep_xchg(gmap, f->ptep, newpte, *pgste, f->gfn);
return 0;
@@ -606,7 +610,7 @@ int gmap_try_fixup_minor(struct gmap *gmap, struct guest_fault *fault)
fault->callback(fault);
pgste_set_unlock(fault->ptep, pgste);
} else {
- rc = gmap_handle_minor_crste_fault(gmap->asce, fault);
+ rc = gmap_handle_minor_crste_fault(gmap, fault);
if (!rc && fault->callback)
fault->callback(fault);
}
@@ -623,10 +627,61 @@ static inline bool gmap_1m_allowed(struct gmap *gmap, gfn_t gfn)
return test_bit(GMAP_FLAG_ALLOW_HPAGE_1M, &gmap->flags);
}
+static int _gmap_link(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, int level,
+ struct guest_fault *f)
+{
+ union crste oldval, newval;
+ union pte newpte, oldpte;
+ union pgste pgste;
+ int rc = 0;
+
+ rc = dat_entry_walk(mc, f->gfn, gmap->asce, DAT_WALK_ALLOC_CONTINUE, level,
+ &f->crstep, &f->ptep);
+ if (rc == -ENOMEM)
+ return rc;
+ if (KVM_BUG_ON(rc == -EINVAL, gmap->kvm))
+ return rc;
+ if (rc)
+ return -EAGAIN;
+ if (KVM_BUG_ON(get_level(f->crstep, f->ptep) > level, gmap->kvm))
+ return -EINVAL;
+
+ if (f->ptep) {
+ pgste = pgste_get_lock(f->ptep);
+ oldpte = *f->ptep;
+ newpte = _pte(f->pfn, f->writable, f->write_attempt | oldpte.s.d, !f->page);
+ newpte.s.sd = oldpte.s.sd;
+ oldpte.s.sd = 0;
+ if (oldpte.val == _PTE_EMPTY.val || oldpte.h.pfra == f->pfn) {
+ pgste = gmap_ptep_xchg(gmap, f->ptep, newpte, pgste, f->gfn);
+ if (f->callback)
+ f->callback(f);
+ } else {
+ rc = -EAGAIN;
+ }
+ pgste_set_unlock(f->ptep, pgste);
+ } else {
+ do {
+ oldval = READ_ONCE(*f->crstep);
+ newval = _crste_fc1(f->pfn, oldval.h.tt, f->writable,
+ f->write_attempt | oldval.s.fc1.d);
+ newval.s.fc1.s = !f->page;
+ newval.s.fc1.sd = oldval.s.fc1.sd;
+ if (oldval.val != _CRSTE_EMPTY(oldval.h.tt).val &&
+ crste_origin_large(oldval) != crste_origin_large(newval))
+ return -EAGAIN;
+ } while (!gmap_crstep_xchg_atomic(gmap, f->crstep, oldval, newval, f->gfn));
+ if (f->callback)
+ f->callback(f);
+ }
+
+ return rc;
+}
+
int gmap_link(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, struct guest_fault *f)
{
unsigned int order;
- int rc, level;
+ int level;
lockdep_assert_held(&gmap->kvm->mmu_lock);
@@ -638,16 +693,14 @@ int gmap_link(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, struct guest_fau
else if (order >= get_order(_SEGMENT_SIZE) && gmap_1m_allowed(gmap, f->gfn))
level = TABLE_TYPE_SEGMENT;
}
- rc = dat_link(mc, gmap->asce, level, uses_skeys(gmap), f);
- KVM_BUG_ON(rc == -EINVAL, gmap->kvm);
- return rc;
+ return _gmap_link(mc, gmap, level, f);
}
static int gmap_ucas_map_one(struct kvm_s390_mmu_cache *mc, struct gmap *gmap,
gfn_t p_gfn, gfn_t c_gfn, bool force_alloc)
{
+ union crste newcrste, oldcrste;
struct page_table *pt;
- union crste newcrste;
union crste *crstep;
union pte *ptep;
int rc;
@@ -673,7 +726,11 @@ static int gmap_ucas_map_one(struct kvm_s390_mmu_cache *mc, struct gmap *gmap,
&crstep, &ptep);
if (rc)
return rc;
- dat_crstep_xchg(crstep, newcrste, c_gfn, gmap->asce);
+ do {
+ oldcrste = READ_ONCE(*crstep);
+ if (oldcrste.val == newcrste.val)
+ break;
+ } while (!dat_crstep_xchg_atomic(crstep, oldcrste, newcrste, c_gfn, gmap->asce));
return 0;
}
@@ -777,8 +834,10 @@ static void gmap_ucas_unmap_one(struct gmap *gmap, gfn_t c_gfn)
int rc;
rc = dat_entry_walk(NULL, c_gfn, gmap->asce, 0, TABLE_TYPE_SEGMENT, &crstep, &ptep);
- if (!rc)
- dat_crstep_xchg(crstep, _PMD_EMPTY, c_gfn, gmap->asce);
+ if (rc)
+ return;
+ while (!dat_crstep_xchg_atomic(crstep, READ_ONCE(*crstep), _PMD_EMPTY, c_gfn, gmap->asce))
+ ;
}
void gmap_ucas_unmap(struct gmap *gmap, gfn_t c_gfn, unsigned long count)
@@ -1017,8 +1076,8 @@ static void gmap_unshadow_level(struct gmap *sg, gfn_t r_gfn, int level)
dat_ptep_xchg(ptep, _PTE_EMPTY, r_gfn, sg->asce, uses_skeys(sg));
return;
}
- crste = READ_ONCE(*crstep);
- dat_crstep_clear(crstep, r_gfn, sg->asce);
+
+ crste = dat_crstep_clear_atomic(crstep, r_gfn, sg->asce);
if (crste_leaf(crste) || crste.h.i)
return;
if (is_pmd(crste))
@@ -1101,6 +1160,7 @@ struct gmap_protect_asce_top_level {
static inline int __gmap_protect_asce_top_level(struct kvm_s390_mmu_cache *mc, struct gmap *sg,
struct gmap_protect_asce_top_level *context)
{
+ struct gmap *parent;
int rc, i;
guard(write_lock)(&sg->kvm->mmu_lock);
@@ -1108,7 +1168,12 @@ static inline int __gmap_protect_asce_top_level(struct kvm_s390_mmu_cache *mc, s
if (kvm_s390_array_needs_retry_safe(sg->kvm, context->seq, context->f))
return -EAGAIN;
- scoped_guard(spinlock, &sg->parent->children_lock) {
+ parent = READ_ONCE(sg->parent);
+ if (!parent)
+ return -EAGAIN;
+ scoped_guard(spinlock, &parent->children_lock) {
+ if (READ_ONCE(sg->parent) != parent)
+ return -EAGAIN;
for (i = 0; i < CRST_TABLE_PAGES; i++) {
if (!context->f[i].valid)
continue;
@@ -1191,6 +1256,9 @@ struct gmap *gmap_create_shadow(struct kvm_s390_mmu_cache *mc, struct gmap *pare
struct gmap *sg, *new;
int rc;
+ if (WARN_ON(!parent))
+ return ERR_PTR(-EINVAL);
+
scoped_guard(spinlock, &parent->children_lock) {
sg = gmap_find_shadow(parent, asce, edat_level);
if (sg) {
diff --git a/arch/s390/kvm/gmap.h b/arch/s390/kvm/gmap.h
index ccb5cd751e31..579399ef5480 100644
--- a/arch/s390/kvm/gmap.h
+++ b/arch/s390/kvm/gmap.h
@@ -185,6 +185,8 @@ static inline union pgste _gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, un
else
_gmap_handle_vsie_unshadow_event(gmap, gfn);
}
+ if (!ptep->s.d && newpte.s.d && !newpte.s.s)
+ SetPageDirty(pfn_to_page(newpte.h.pfra));
return __dat_ptep_xchg(ptep, pgste, newpte, gfn, gmap->asce, uses_skeys(gmap));
}
@@ -194,35 +196,42 @@ static inline union pgste gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, uni
return _gmap_ptep_xchg(gmap, ptep, newpte, pgste, gfn, true);
}
-static inline void _gmap_crstep_xchg(struct gmap *gmap, union crste *crstep, union crste ne,
- gfn_t gfn, bool needs_lock)
+static inline bool __must_check _gmap_crstep_xchg_atomic(struct gmap *gmap, union crste *crstep,
+ union crste oldcrste, union crste newcrste,
+ gfn_t gfn, bool needs_lock)
{
- unsigned long align = 8 + (is_pmd(*crstep) ? 0 : 11);
+ unsigned long align = is_pmd(newcrste) ? _PAGE_ENTRIES : _PAGE_ENTRIES * _CRST_ENTRIES;
+
+ if (KVM_BUG_ON(crstep->h.tt != oldcrste.h.tt || newcrste.h.tt != oldcrste.h.tt, gmap->kvm))
+ return true;
lockdep_assert_held(&gmap->kvm->mmu_lock);
if (!needs_lock)
lockdep_assert_held(&gmap->children_lock);
gfn = ALIGN_DOWN(gfn, align);
- if (crste_prefix(*crstep) && (ne.h.p || ne.h.i || !crste_prefix(ne))) {
- ne.s.fc1.prefix_notif = 0;
+ if (crste_prefix(oldcrste) && (newcrste.h.p || newcrste.h.i || !crste_prefix(newcrste))) {
+ newcrste.s.fc1.prefix_notif = 0;
gmap_unmap_prefix(gmap, gfn, gfn + align);
}
- if (crste_leaf(*crstep) && crstep->s.fc1.vsie_notif &&
- (ne.h.p || ne.h.i || !ne.s.fc1.vsie_notif)) {
- ne.s.fc1.vsie_notif = 0;
+ if (crste_leaf(oldcrste) && oldcrste.s.fc1.vsie_notif &&
+ (newcrste.h.p || newcrste.h.i || !newcrste.s.fc1.vsie_notif)) {
+ newcrste.s.fc1.vsie_notif = 0;
if (needs_lock)
gmap_handle_vsie_unshadow_event(gmap, gfn);
else
_gmap_handle_vsie_unshadow_event(gmap, gfn);
}
- dat_crstep_xchg(crstep, ne, gfn, gmap->asce);
+ if (!oldcrste.s.fc1.d && newcrste.s.fc1.d && !newcrste.s.fc1.s)
+ SetPageDirty(phys_to_page(crste_origin_large(newcrste)));
+ return dat_crstep_xchg_atomic(crstep, oldcrste, newcrste, gfn, gmap->asce);
}
-static inline void gmap_crstep_xchg(struct gmap *gmap, union crste *crstep, union crste ne,
- gfn_t gfn)
+static inline bool __must_check gmap_crstep_xchg_atomic(struct gmap *gmap, union crste *crstep,
+ union crste oldcrste, union crste newcrste,
+ gfn_t gfn)
{
- return _gmap_crstep_xchg(gmap, crstep, ne, gfn, true);
+ return _gmap_crstep_xchg_atomic(gmap, crstep, oldcrste, newcrste, gfn, true);
}
/**
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 18932a65ca68..7cb8ce833b62 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -2724,6 +2724,9 @@ static unsigned long get_ind_bit(__u64 addr, unsigned long bit_nr, bool swap)
bit = bit_nr + (addr % PAGE_SIZE) * 8;
+ /* kvm_set_routing_entry() should never allow this to happen */
+ WARN_ON_ONCE(bit > (PAGE_SIZE * BITS_PER_BYTE - 1));
+
return swap ? (bit ^ (BITS_PER_LONG - 1)) : bit;
}
@@ -2824,6 +2827,12 @@ void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu,
int rc;
mci.val = mcck_info->mcic;
+
+ /* log machine checks being reinjected on all debugs */
+ VCPU_EVENT(vcpu, 2, "guest machine check %lx", mci.val);
+ KVM_EVENT(2, "guest machine check %lx", mci.val);
+ pr_info("guest machine check pid %d: %lx", current->pid, mci.val);
+
if (mci.sr)
cr14 |= CR14_RECOVERY_SUBMASK;
if (mci.dg)
@@ -2852,6 +2861,7 @@ int kvm_set_routing_entry(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue)
{
+ const struct kvm_irq_routing_s390_adapter *adapter;
u64 uaddr_s, uaddr_i;
int idx;
@@ -2862,6 +2872,14 @@ int kvm_set_routing_entry(struct kvm *kvm,
return -EINVAL;
e->set = set_adapter_int;
+ adapter = &ue->u.adapter;
+ if (adapter->summary_addr + (adapter->summary_offset / 8) >=
+ (adapter->summary_addr & PAGE_MASK) + PAGE_SIZE)
+ return -EINVAL;
+ if (adapter->ind_addr + (adapter->ind_offset / 8) >=
+ (adapter->ind_addr & PAGE_MASK) + PAGE_SIZE)
+ return -EINVAL;
+
idx = srcu_read_lock(&kvm->srcu);
uaddr_s = gpa_to_hva(kvm, ue->u.adapter.summary_addr);
uaddr_i = gpa_to_hva(kvm, ue->u.adapter.ind_addr);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 3eb60aa932ec..d7838334a338 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -4617,7 +4617,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
return 0;
}
-static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
+static int vcpu_post_run(struct kvm_vcpu *vcpu, int sie_return)
{
struct mcck_volatile_info *mcck_info;
struct sie_page *sie_page;
@@ -4633,14 +4633,14 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
- if (exit_reason == -EINTR) {
- VCPU_EVENT(vcpu, 3, "%s", "machine check");
+ if (sie_return == SIE64_RETURN_MCCK) {
sie_page = container_of(vcpu->arch.sie_block,
struct sie_page, sie_block);
mcck_info = &sie_page->mcck_info;
kvm_s390_reinject_machine_check(vcpu, mcck_info);
return 0;
}
+ WARN_ON_ONCE(sie_return != SIE64_RETURN_NORMAL);
if (vcpu->arch.sie_block->icptcode > 0) {
rc = kvm_handle_sie_intercept(vcpu);
@@ -4679,7 +4679,7 @@ int noinstr kvm_s390_enter_exit_sie(struct kvm_s390_sie_block *scb,
#define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
static int __vcpu_run(struct kvm_vcpu *vcpu)
{
- int rc, exit_reason;
+ int rc, sie_return;
struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
/*
@@ -4719,9 +4719,9 @@ xfer_to_guest_mode_check:
guest_timing_enter_irqoff();
__disable_cpu_timer_accounting(vcpu);
- exit_reason = kvm_s390_enter_exit_sie(vcpu->arch.sie_block,
- vcpu->run->s.regs.gprs,
- vcpu->arch.gmap->asce.val);
+ sie_return = kvm_s390_enter_exit_sie(vcpu->arch.sie_block,
+ vcpu->run->s.regs.gprs,
+ vcpu->arch.gmap->asce.val);
__enable_cpu_timer_accounting(vcpu);
guest_timing_exit_irqoff();
@@ -4744,7 +4744,7 @@ xfer_to_guest_mode_check:
}
kvm_vcpu_srcu_read_lock(vcpu);
- rc = vcpu_post_run(vcpu, exit_reason);
+ rc = vcpu_post_run(vcpu, sie_return);
if (rc || guestdbg_exit_pending(vcpu)) {
kvm_vcpu_srcu_read_unlock(vcpu);
break;
@@ -5520,9 +5520,21 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
}
#endif
case KVM_S390_VCPU_FAULT: {
- idx = srcu_read_lock(&vcpu->kvm->srcu);
- r = vcpu_dat_fault_handler(vcpu, arg, 0);
- srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ gpa_t gaddr = arg;
+
+ scoped_guard(srcu, &vcpu->kvm->srcu) {
+ r = vcpu_ucontrol_translate(vcpu, &gaddr);
+ if (r)
+ break;
+
+ r = kvm_s390_faultin_gfn_simple(vcpu, NULL, gpa_to_gfn(gaddr), false);
+ if (r == PGM_ADDRESSING)
+ r = -EFAULT;
+ if (r <= 0)
+ break;
+ r = -EIO;
+ KVM_BUG_ON(r, vcpu->kvm);
+ }
break;
}
case KVM_ENABLE_CAP:
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index d249b10044eb..72895dddc39a 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -1122,6 +1122,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, struc
{
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
+ unsigned long sie_return = SIE64_RETURN_NORMAL;
int guest_bp_isolation;
int rc = 0;
@@ -1163,7 +1164,7 @@ xfer_to_guest_mode_check:
goto xfer_to_guest_mode_check;
}
guest_timing_enter_irqoff();
- rc = kvm_s390_enter_exit_sie(scb_s, vcpu->run->s.regs.gprs, sg->asce.val);
+ sie_return = kvm_s390_enter_exit_sie(scb_s, vcpu->run->s.regs.gprs, sg->asce.val);
guest_timing_exit_irqoff();
local_irq_enable();
}
@@ -1178,12 +1179,13 @@ skip_sie:
kvm_vcpu_srcu_read_lock(vcpu);
- if (rc == -EINTR) {
- VCPU_EVENT(vcpu, 3, "%s", "machine check");
+ if (sie_return == SIE64_RETURN_MCCK) {
kvm_s390_reinject_machine_check(vcpu, &vsie_page->mcck_info);
return 0;
}
+ WARN_ON_ONCE(sie_return != SIE64_RETURN_NORMAL);
+
if (rc > 0)
rc = 0; /* we could still have an icpt */
else if (current->thread.gmap_int_code)
@@ -1326,7 +1328,7 @@ static void unregister_shadow_scb(struct kvm_vcpu *vcpu)
static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
{
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
- struct gmap *sg;
+ struct gmap *sg = NULL;
int rc = 0;
while (1) {
@@ -1366,6 +1368,8 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
sg = gmap_put(sg);
cond_resched();
}
+ if (sg)
+ sg = gmap_put(sg);
if (rc == -EFAULT) {
/*
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index a52aa7a99b6b..191cc53caead 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -441,10 +441,17 @@ void do_secure_storage_access(struct pt_regs *regs)
folio = phys_to_folio(addr);
if (unlikely(!folio_try_get(folio)))
return;
- rc = arch_make_folio_accessible(folio);
+ rc = uv_convert_from_secure(folio_to_phys(folio));
+ if (!rc)
+ clear_bit(PG_arch_1, &folio->flags.f);
folio_put(folio);
+ /*
+ * There are some valid fixup types for kernel
+ * accesses to donated secure memory. zeropad is one
+ * of them.
+ */
if (rc)
- BUG();
+ return handle_fault_error_nolock(regs, 0);
} else {
if (faulthandler_disabled())
return handle_fault_error_nolock(regs, 0);
diff --git a/arch/x86/coco/sev/noinstr.c b/arch/x86/coco/sev/noinstr.c
index 9d94aca4a698..5afd663a1c21 100644
--- a/arch/x86/coco/sev/noinstr.c
+++ b/arch/x86/coco/sev/noinstr.c
@@ -121,6 +121,9 @@ noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state)
WARN_ON(!irqs_disabled());
+ if (!sev_cfg.ghcbs_initialized)
+ return boot_ghcb;
+
data = this_cpu_read(runtime_data);
ghcb = &data->ghcb_page;
@@ -164,6 +167,9 @@ noinstr void __sev_put_ghcb(struct ghcb_state *state)
WARN_ON(!irqs_disabled());
+ if (!sev_cfg.ghcbs_initialized)
+ return;
+
data = this_cpu_read(runtime_data);
ghcb = &data->ghcb_page;
diff --git a/arch/x86/entry/entry_fred.c b/arch/x86/entry/entry_fred.c
index 88c757ac8ccd..fbe2d10dd737 100644
--- a/arch/x86/entry/entry_fred.c
+++ b/arch/x86/entry/entry_fred.c
@@ -177,6 +177,16 @@ static noinstr void fred_extint(struct pt_regs *regs)
}
}
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+noinstr void exc_vmm_communication(struct pt_regs *regs, unsigned long error_code)
+{
+ if (user_mode(regs))
+ return user_exc_vmm_communication(regs, error_code);
+ else
+ return kernel_exc_vmm_communication(regs, error_code);
+}
+#endif
+
static noinstr void fred_hwexc(struct pt_regs *regs, unsigned long error_code)
{
/* Optimize for #PF. That's the only exception which matters performance wise */
@@ -207,6 +217,10 @@ static noinstr void fred_hwexc(struct pt_regs *regs, unsigned long error_code)
#ifdef CONFIG_X86_CET
case X86_TRAP_CP: return exc_control_protection(regs, error_code);
#endif
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ case X86_TRAP_VC: return exc_vmm_communication(regs, error_code);
+#endif
+
default: return fred_bad_type(regs, error_code);
}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index a8ff4376c286..ec0670114efa 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -433,7 +433,20 @@ static __always_inline void setup_lass(struct cpuinfo_x86 *c)
/* These bits should not change their value after CPU init is finished. */
static const unsigned long cr4_pinned_mask = X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP |
- X86_CR4_FSGSBASE | X86_CR4_CET | X86_CR4_FRED;
+ X86_CR4_FSGSBASE | X86_CR4_CET;
+
+/*
+ * The CR pinning protects against ROP on the 'mov %reg, %CRn' instruction(s).
+ * Since you can ROP directly to these instructions (barring shadow stack),
+ * any protection must follow immediately and unconditionally after that.
+ *
+ * Specifically, the CR[04] write functions below will have the value
+ * validation controlled by the @cr_pinning static_branch which is
+ * __ro_after_init, just like the cr4_pinned_bits value.
+ *
+ * Once set, an attacker will have to defeat page-tables to get around these
+ * restrictions. Which is a much bigger ask than 'simple' ROP.
+ */
static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning);
static unsigned long cr4_pinned_bits __ro_after_init;
@@ -2050,12 +2063,6 @@ static void identify_cpu(struct cpuinfo_x86 *c)
setup_umip(c);
setup_lass(c);
- /* Enable FSGSBASE instructions if available. */
- if (cpu_has(c, X86_FEATURE_FSGSBASE)) {
- cr4_set_bits(X86_CR4_FSGSBASE);
- elf_hwcap2 |= HWCAP2_FSGSBASE;
- }
-
/*
* The vendor-specific functions might have changed features.
* Now we do "generic changes."
@@ -2416,6 +2423,18 @@ void cpu_init_exception_handling(bool boot_cpu)
/* GHCB needs to be setup to handle #VC. */
setup_ghcb();
+ /*
+ * On CPUs with FSGSBASE support, paranoid_entry() uses
+ * ALTERNATIVE-patched RDGSBASE/WRGSBASE instructions. Secondary CPUs
+ * boot after alternatives are patched globally, so early exceptions
+ * execute patched code that depends on FSGSBASE. Enable the feature
+ * before any exceptions occur.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_FSGSBASE)) {
+ cr4_set_bits(X86_CR4_FSGSBASE);
+ elf_hwcap2 |= HWCAP2_FSGSBASE;
+ }
+
if (cpu_feature_enabled(X86_FEATURE_FRED)) {
/* The boot CPU has enabled FRED during early boot */
if (!boot_cpu)
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index b922a8b00057..dd06453d5b72 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3044,12 +3044,6 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
bool prefetch = !fault || fault->prefetch;
bool write_fault = fault && fault->write;
- if (unlikely(is_noslot_pfn(pfn))) {
- vcpu->stat.pf_mmio_spte_created++;
- mark_mmio_spte(vcpu, sptep, gfn, pte_access);
- return RET_PF_EMULATE;
- }
-
if (is_shadow_present_pte(*sptep)) {
if (prefetch && is_last_spte(*sptep, level) &&
pfn == spte_to_pfn(*sptep))
@@ -3066,13 +3060,22 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
child = spte_to_child_sp(pte);
drop_parent_pte(vcpu->kvm, child, sptep);
flush = true;
- } else if (WARN_ON_ONCE(pfn != spte_to_pfn(*sptep))) {
+ } else if (pfn != spte_to_pfn(*sptep)) {
+ WARN_ON_ONCE(vcpu->arch.mmu->root_role.direct);
drop_spte(vcpu->kvm, sptep);
flush = true;
} else
was_rmapped = 1;
}
+ if (unlikely(is_noslot_pfn(pfn))) {
+ vcpu->stat.pf_mmio_spte_created++;
+ mark_mmio_spte(vcpu, sptep, gfn, pte_access);
+ if (flush)
+ kvm_flush_remote_tlbs_gfn(vcpu->kvm, gfn, level);
+ return RET_PF_EMULATE;
+ }
+
wrprot = make_spte(vcpu, sp, slot, pte_access, gfn, pfn, *sptep, prefetch,
false, host_writable, &spte);
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 35caa5746115..79f0818131e8 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -424,7 +424,7 @@ void __init efi_unmap_boot_services(void)
if (efi_enabled(EFI_DBG))
return;
- sz = sizeof(*ranges_to_free) * efi.memmap.nr_map + 1;
+ sz = sizeof(*ranges_to_free) * (efi.memmap.nr_map + 1);
ranges_to_free = kzalloc(sz, GFP_KERNEL);
if (!ranges_to_free) {
pr_err("Failed to allocate storage for freeable EFI regions\n");
diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index 0bb609fbec7d..8e0199394984 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -623,8 +623,10 @@ static int af_alg_alloc_tsgl(struct sock *sk)
sg_init_table(sgl->sg, MAX_SGL_ENTS + 1);
sgl->cur = 0;
- if (sg)
+ if (sg) {
+ sg_unmark_end(sg + MAX_SGL_ENTS - 1);
sg_chain(sg, MAX_SGL_ENTS + 1, sgl->sg);
+ }
list_add_tail(&sgl->list, &ctx->tsgl_list);
}
@@ -635,15 +637,13 @@ static int af_alg_alloc_tsgl(struct sock *sk)
/**
* af_alg_count_tsgl - Count number of TX SG entries
*
- * The counting starts from the beginning of the SGL to @bytes. If
- * an @offset is provided, the counting of the SG entries starts at the @offset.
+ * The counting starts from the beginning of the SGL to @bytes.
*
* @sk: socket of connection to user space
* @bytes: Count the number of SG entries holding given number of bytes.
- * @offset: Start the counting of SG entries from the given offset.
* Return: Number of TX SG entries found given the constraints
*/
-unsigned int af_alg_count_tsgl(struct sock *sk, size_t bytes, size_t offset)
+unsigned int af_alg_count_tsgl(struct sock *sk, size_t bytes)
{
const struct alg_sock *ask = alg_sk(sk);
const struct af_alg_ctx *ctx = ask->private;
@@ -658,25 +658,11 @@ unsigned int af_alg_count_tsgl(struct sock *sk, size_t bytes, size_t offset)
const struct scatterlist *sg = sgl->sg;
for (i = 0; i < sgl->cur; i++) {
- size_t bytes_count;
-
- /* Skip offset */
- if (offset >= sg[i].length) {
- offset -= sg[i].length;
- bytes -= sg[i].length;
- continue;
- }
-
- bytes_count = sg[i].length - offset;
-
- offset = 0;
sgl_count++;
-
- /* If we have seen requested number of bytes, stop */
- if (bytes_count >= bytes)
+ if (sg[i].length >= bytes)
return sgl_count;
- bytes -= bytes_count;
+ bytes -= sg[i].length;
}
}
@@ -688,19 +674,14 @@ EXPORT_SYMBOL_GPL(af_alg_count_tsgl);
* af_alg_pull_tsgl - Release the specified buffers from TX SGL
*
* If @dst is non-null, reassign the pages to @dst. The caller must release
- * the pages. If @dst_offset is given only reassign the pages to @dst starting
- * at the @dst_offset (byte). The caller must ensure that @dst is large
- * enough (e.g. by using af_alg_count_tsgl with the same offset).
+ * the pages.
*
* @sk: socket of connection to user space
* @used: Number of bytes to pull from TX SGL
* @dst: If non-NULL, buffer is reassigned to dst SGL instead of releasing. The
* caller must release the buffers in dst.
- * @dst_offset: Reassign the TX SGL from given offset. All buffers before
- * reaching the offset is released.
*/
-void af_alg_pull_tsgl(struct sock *sk, size_t used, struct scatterlist *dst,
- size_t dst_offset)
+void af_alg_pull_tsgl(struct sock *sk, size_t used, struct scatterlist *dst)
{
struct alg_sock *ask = alg_sk(sk);
struct af_alg_ctx *ctx = ask->private;
@@ -725,18 +706,10 @@ void af_alg_pull_tsgl(struct sock *sk, size_t used, struct scatterlist *dst,
* SG entries in dst.
*/
if (dst) {
- if (dst_offset >= plen) {
- /* discard page before offset */
- dst_offset -= plen;
- } else {
- /* reassign page to dst after offset */
- get_page(page);
- sg_set_page(dst + j, page,
- plen - dst_offset,
- sg[i].offset + dst_offset);
- dst_offset = 0;
- j++;
- }
+ /* reassign page to dst after offset */
+ get_page(page);
+ sg_set_page(dst + j, page, plen, sg[i].offset);
+ j++;
}
sg[i].length -= plen;
diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
index 79b016a899a1..dda15bb05e89 100644
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -26,7 +26,6 @@
#include <crypto/internal/aead.h>
#include <crypto/scatterwalk.h>
#include <crypto/if_alg.h>
-#include <crypto/skcipher.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/kernel.h>
@@ -72,9 +71,8 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
struct alg_sock *pask = alg_sk(psk);
struct af_alg_ctx *ctx = ask->private;
struct crypto_aead *tfm = pask->private;
- unsigned int i, as = crypto_aead_authsize(tfm);
+ unsigned int as = crypto_aead_authsize(tfm);
struct af_alg_async_req *areq;
- struct af_alg_tsgl *tsgl, *tmp;
struct scatterlist *rsgl_src, *tsgl_src = NULL;
int err = 0;
size_t used = 0; /* [in] TX bufs to be en/decrypted */
@@ -154,23 +152,24 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
outlen -= less;
}
+ /*
+ * Create a per request TX SGL for this request which tracks the
+ * SG entries from the global TX SGL.
+ */
processed = used + ctx->aead_assoclen;
- list_for_each_entry_safe(tsgl, tmp, &ctx->tsgl_list, list) {
- for (i = 0; i < tsgl->cur; i++) {
- struct scatterlist *process_sg = tsgl->sg + i;
-
- if (!(process_sg->length) || !sg_page(process_sg))
- continue;
- tsgl_src = process_sg;
- break;
- }
- if (tsgl_src)
- break;
- }
- if (processed && !tsgl_src) {
- err = -EFAULT;
+ areq->tsgl_entries = af_alg_count_tsgl(sk, processed);
+ if (!areq->tsgl_entries)
+ areq->tsgl_entries = 1;
+ areq->tsgl = sock_kmalloc(sk, array_size(sizeof(*areq->tsgl),
+ areq->tsgl_entries),
+ GFP_KERNEL);
+ if (!areq->tsgl) {
+ err = -ENOMEM;
goto free;
}
+ sg_init_table(areq->tsgl, areq->tsgl_entries);
+ af_alg_pull_tsgl(sk, processed, areq->tsgl);
+ tsgl_src = areq->tsgl;
/*
* Copy of AAD from source to destination
@@ -179,76 +178,15 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
* when user space uses an in-place cipher operation, the kernel
* will copy the data as it does not see whether such in-place operation
* is initiated.
- *
- * To ensure efficiency, the following implementation ensure that the
- * ciphers are invoked to perform a crypto operation in-place. This
- * is achieved by memory management specified as follows.
*/
/* Use the RX SGL as source (and destination) for crypto op. */
rsgl_src = areq->first_rsgl.sgl.sgt.sgl;
- if (ctx->enc) {
- /*
- * Encryption operation - The in-place cipher operation is
- * achieved by the following operation:
- *
- * TX SGL: AAD || PT
- * | |
- * | copy |
- * v v
- * RX SGL: AAD || PT || Tag
- */
- memcpy_sglist(areq->first_rsgl.sgl.sgt.sgl, tsgl_src,
- processed);
- af_alg_pull_tsgl(sk, processed, NULL, 0);
- } else {
- /*
- * Decryption operation - To achieve an in-place cipher
- * operation, the following SGL structure is used:
- *
- * TX SGL: AAD || CT || Tag
- * | | ^
- * | copy | | Create SGL link.
- * v v |
- * RX SGL: AAD || CT ----+
- */
-
- /* Copy AAD || CT to RX SGL buffer for in-place operation. */
- memcpy_sglist(areq->first_rsgl.sgl.sgt.sgl, tsgl_src, outlen);
-
- /* Create TX SGL for tag and chain it to RX SGL. */
- areq->tsgl_entries = af_alg_count_tsgl(sk, processed,
- processed - as);
- if (!areq->tsgl_entries)
- areq->tsgl_entries = 1;
- areq->tsgl = sock_kmalloc(sk, array_size(sizeof(*areq->tsgl),
- areq->tsgl_entries),
- GFP_KERNEL);
- if (!areq->tsgl) {
- err = -ENOMEM;
- goto free;
- }
- sg_init_table(areq->tsgl, areq->tsgl_entries);
-
- /* Release TX SGL, except for tag data and reassign tag data. */
- af_alg_pull_tsgl(sk, processed, areq->tsgl, processed - as);
-
- /* chain the areq TX SGL holding the tag with RX SGL */
- if (usedpages) {
- /* RX SGL present */
- struct af_alg_sgl *sgl_prev = &areq->last_rsgl->sgl;
- struct scatterlist *sg = sgl_prev->sgt.sgl;
-
- sg_unmark_end(sg + sgl_prev->sgt.nents - 1);
- sg_chain(sg, sgl_prev->sgt.nents + 1, areq->tsgl);
- } else
- /* no RX SGL present (e.g. authentication only) */
- rsgl_src = areq->tsgl;
- }
+ memcpy_sglist(rsgl_src, tsgl_src, ctx->aead_assoclen);
/* Initialize the crypto operation */
- aead_request_set_crypt(&areq->cra_u.aead_req, rsgl_src,
+ aead_request_set_crypt(&areq->cra_u.aead_req, tsgl_src,
areq->first_rsgl.sgl.sgt.sgl, used, ctx->iv);
aead_request_set_ad(&areq->cra_u.aead_req, ctx->aead_assoclen);
aead_request_set_tfm(&areq->cra_u.aead_req, tfm);
@@ -450,7 +388,7 @@ static void aead_sock_destruct(struct sock *sk)
struct crypto_aead *tfm = pask->private;
unsigned int ivlen = crypto_aead_ivsize(tfm);
- af_alg_pull_tsgl(sk, ctx->used, NULL, 0);
+ af_alg_pull_tsgl(sk, ctx->used, NULL);
sock_kzfree_s(sk, ctx->iv, ivlen);
sock_kfree_s(sk, ctx, ctx->len);
af_alg_release_parent(sk);
diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c
index 125d395c5e00..82735e51be10 100644
--- a/crypto/algif_skcipher.c
+++ b/crypto/algif_skcipher.c
@@ -138,7 +138,7 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
* Create a per request TX SGL for this request which tracks the
* SG entries from the global TX SGL.
*/
- areq->tsgl_entries = af_alg_count_tsgl(sk, len, 0);
+ areq->tsgl_entries = af_alg_count_tsgl(sk, len);
if (!areq->tsgl_entries)
areq->tsgl_entries = 1;
areq->tsgl = sock_kmalloc(sk, array_size(sizeof(*areq->tsgl),
@@ -149,7 +149,7 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
goto free;
}
sg_init_table(areq->tsgl, areq->tsgl_entries);
- af_alg_pull_tsgl(sk, len, areq->tsgl, 0);
+ af_alg_pull_tsgl(sk, len, areq->tsgl);
/* Initialize the crypto operation */
skcipher_request_set_tfm(&areq->cra_u.skcipher_req, tfm);
@@ -363,7 +363,7 @@ static void skcipher_sock_destruct(struct sock *sk)
struct alg_sock *pask = alg_sk(psk);
struct crypto_skcipher *tfm = pask->private;
- af_alg_pull_tsgl(sk, ctx->used, NULL, 0);
+ af_alg_pull_tsgl(sk, ctx->used, NULL);
sock_kzfree_s(sk, ctx->iv, crypto_skcipher_ivsize(tfm));
if (ctx->state)
sock_kzfree_s(sk, ctx->state, crypto_skcipher_statesize(tfm));
diff --git a/crypto/authencesn.c b/crypto/authencesn.c
index 542a978663b9..c0a01d738d9b 100644
--- a/crypto/authencesn.c
+++ b/crypto/authencesn.c
@@ -207,6 +207,7 @@ static int crypto_authenc_esn_decrypt_tail(struct aead_request *req,
u8 *ohash = areq_ctx->tail;
unsigned int cryptlen = req->cryptlen - authsize;
unsigned int assoclen = req->assoclen;
+ struct scatterlist *src = req->src;
struct scatterlist *dst = req->dst;
u8 *ihash = ohash + crypto_ahash_digestsize(auth);
u32 tmp[2];
@@ -214,23 +215,27 @@ static int crypto_authenc_esn_decrypt_tail(struct aead_request *req,
if (!authsize)
goto decrypt;
- /* Move high-order bits of sequence number back. */
- scatterwalk_map_and_copy(tmp, dst, 4, 4, 0);
- scatterwalk_map_and_copy(tmp + 1, dst, assoclen + cryptlen, 4, 0);
- scatterwalk_map_and_copy(tmp, dst, 0, 8, 1);
+ if (src == dst) {
+ /* Move high-order bits of sequence number back. */
+ scatterwalk_map_and_copy(tmp, dst, 4, 4, 0);
+ scatterwalk_map_and_copy(tmp + 1, dst, assoclen + cryptlen, 4, 0);
+ scatterwalk_map_and_copy(tmp, dst, 0, 8, 1);
+ } else
+ memcpy_sglist(dst, src, assoclen);
if (crypto_memneq(ihash, ohash, authsize))
return -EBADMSG;
decrypt:
- sg_init_table(areq_ctx->dst, 2);
+ if (src != dst)
+ src = scatterwalk_ffwd(areq_ctx->src, src, assoclen);
dst = scatterwalk_ffwd(areq_ctx->dst, dst, assoclen);
skcipher_request_set_tfm(skreq, ctx->enc);
skcipher_request_set_callback(skreq, flags,
req->base.complete, req->base.data);
- skcipher_request_set_crypt(skreq, dst, dst, cryptlen, req->iv);
+ skcipher_request_set_crypt(skreq, src, dst, cryptlen, req->iv);
return crypto_skcipher_decrypt(skreq);
}
@@ -255,6 +260,7 @@ static int crypto_authenc_esn_decrypt(struct aead_request *req)
unsigned int assoclen = req->assoclen;
unsigned int cryptlen = req->cryptlen;
u8 *ihash = ohash + crypto_ahash_digestsize(auth);
+ struct scatterlist *src = req->src;
struct scatterlist *dst = req->dst;
u32 tmp[2];
int err;
@@ -262,24 +268,28 @@ static int crypto_authenc_esn_decrypt(struct aead_request *req)
if (assoclen < 8)
return -EINVAL;
- cryptlen -= authsize;
-
- if (req->src != dst)
- memcpy_sglist(dst, req->src, assoclen + cryptlen);
+ if (!authsize)
+ goto tail;
+ cryptlen -= authsize;
scatterwalk_map_and_copy(ihash, req->src, assoclen + cryptlen,
authsize, 0);
- if (!authsize)
- goto tail;
-
/* Move high-order bits of sequence number to the end. */
- scatterwalk_map_and_copy(tmp, dst, 0, 8, 0);
- scatterwalk_map_and_copy(tmp, dst, 4, 4, 1);
- scatterwalk_map_and_copy(tmp + 1, dst, assoclen + cryptlen, 4, 1);
-
- sg_init_table(areq_ctx->dst, 2);
- dst = scatterwalk_ffwd(areq_ctx->dst, dst, 4);
+ scatterwalk_map_and_copy(tmp, src, 0, 8, 0);
+ if (src == dst) {
+ scatterwalk_map_and_copy(tmp, dst, 4, 4, 1);
+ scatterwalk_map_and_copy(tmp + 1, dst, assoclen + cryptlen, 4, 1);
+ dst = scatterwalk_ffwd(areq_ctx->dst, dst, 4);
+ } else {
+ scatterwalk_map_and_copy(tmp, dst, 0, 4, 1);
+ scatterwalk_map_and_copy(tmp + 1, dst, assoclen + cryptlen - 4, 4, 1);
+
+ src = scatterwalk_ffwd(areq_ctx->src, src, 8);
+ dst = scatterwalk_ffwd(areq_ctx->dst, dst, 4);
+ memcpy_sglist(dst, src, assoclen + cryptlen - 8);
+ dst = req->dst;
+ }
ahash_request_set_tfm(ahreq, auth);
ahash_request_set_crypt(ahreq, dst, ohash, assoclen + cryptlen);
diff --git a/crypto/deflate.c b/crypto/deflate.c
index 46fc7def8d4c..710ebba7ce85 100644
--- a/crypto/deflate.c
+++ b/crypto/deflate.c
@@ -164,18 +164,21 @@ static int deflate_decompress_one(struct acomp_req *req,
do {
unsigned int dcur;
+ unsigned long avail_in;
dcur = acomp_walk_next_dst(&walk);
- if (!dcur) {
- out_of_space = true;
- break;
- }
stream->avail_out = dcur;
stream->next_out = walk.dst.virt.addr;
+ avail_in = stream->avail_in;
ret = zlib_inflate(stream, Z_NO_FLUSH);
+ if (!dcur && avail_in == stream->avail_in) {
+ out_of_space = true;
+ break;
+ }
+
dcur -= stream->avail_out;
acomp_walk_done_dst(&walk, dcur);
} while (ret == Z_OK && stream->avail_in);
diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h
index 5b34b6f50e69..f1b6155065ff 100644
--- a/drivers/accel/ivpu/ivpu_drv.h
+++ b/drivers/accel/ivpu/ivpu_drv.h
@@ -35,6 +35,7 @@
#define IVPU_HW_IP_60XX 60
#define IVPU_HW_IP_REV_LNL_B0 4
+#define IVPU_HW_IP_REV_NVL_A0 0
#define IVPU_HW_BTRS_MTL 1
#define IVPU_HW_BTRS_LNL 2
diff --git a/drivers/accel/ivpu/ivpu_hw.c b/drivers/accel/ivpu/ivpu_hw.c
index d69cd0d93569..d4a9bcda4100 100644
--- a/drivers/accel/ivpu/ivpu_hw.c
+++ b/drivers/accel/ivpu/ivpu_hw.c
@@ -70,8 +70,10 @@ static void wa_init(struct ivpu_device *vdev)
if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
vdev->wa.interrupt_clear_with_0 = ivpu_hw_btrs_irqs_clear_with_0_mtl(vdev);
- if (ivpu_device_id(vdev) == PCI_DEVICE_ID_LNL &&
- ivpu_revision(vdev) < IVPU_HW_IP_REV_LNL_B0)
+ if ((ivpu_device_id(vdev) == PCI_DEVICE_ID_LNL &&
+ ivpu_revision(vdev) < IVPU_HW_IP_REV_LNL_B0) ||
+ (ivpu_device_id(vdev) == PCI_DEVICE_ID_NVL &&
+ ivpu_revision(vdev) == IVPU_HW_IP_REV_NVL_A0))
vdev->wa.disable_clock_relinquish = true;
if (ivpu_test_mode & IVPU_TEST_MODE_CLK_RELINQ_ENABLE)
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 5f63ed120a2c..6f0065257a77 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -1656,6 +1656,8 @@ static int acpi_ec_setup(struct acpi_ec *ec, struct acpi_device *device, bool ca
ret = ec_install_handlers(ec, device, call_reg);
if (ret) {
+ ec_remove_handlers(ec);
+
if (ec == first_ec)
first_ec = NULL;
diff --git a/drivers/auxdisplay/lcd2s.c b/drivers/auxdisplay/lcd2s.c
index defb0573e43c..c7a962728752 100644
--- a/drivers/auxdisplay/lcd2s.c
+++ b/drivers/auxdisplay/lcd2s.c
@@ -99,8 +99,13 @@ static int lcd2s_print(struct charlcd *lcd, int c)
{
struct lcd2s_data *lcd2s = lcd->drvdata;
u8 buf[2] = { LCD2S_CMD_WRITE, c };
+ int ret;
- lcd2s_i2c_master_send(lcd2s->i2c, buf, sizeof(buf));
+ ret = lcd2s_i2c_master_send(lcd2s->i2c, buf, sizeof(buf));
+ if (ret < 0)
+ return ret;
+ if (ret != sizeof(buf))
+ return -EIO;
return 0;
}
@@ -108,9 +113,13 @@ static int lcd2s_gotoxy(struct charlcd *lcd, unsigned int x, unsigned int y)
{
struct lcd2s_data *lcd2s = lcd->drvdata;
u8 buf[3] = { LCD2S_CMD_CUR_POS, y + 1, x + 1 };
+ int ret;
- lcd2s_i2c_master_send(lcd2s->i2c, buf, sizeof(buf));
-
+ ret = lcd2s_i2c_master_send(lcd2s->i2c, buf, sizeof(buf));
+ if (ret < 0)
+ return ret;
+ if (ret != sizeof(buf))
+ return -EIO;
return 0;
}
diff --git a/drivers/auxdisplay/line-display.c b/drivers/auxdisplay/line-display.c
index 81b4aac65807..fb6d9294140d 100644
--- a/drivers/auxdisplay/line-display.c
+++ b/drivers/auxdisplay/line-display.c
@@ -365,7 +365,7 @@ static DEFINE_IDA(linedisp_id);
static void linedisp_release(struct device *dev)
{
- struct linedisp *linedisp = to_linedisp(dev);
+ struct linedisp *linedisp = container_of(dev, struct linedisp, dev);
kfree(linedisp->map);
kfree(linedisp->message);
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 607c1246d994..e388b19850e3 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -1545,6 +1545,7 @@ static int _regmap_select_page(struct regmap *map, unsigned int *reg,
unsigned int val_num)
{
void *orig_work_buf;
+ unsigned int selector_reg;
unsigned int win_offset;
unsigned int win_page;
bool page_chg;
@@ -1563,10 +1564,31 @@ static int _regmap_select_page(struct regmap *map, unsigned int *reg,
return -EINVAL;
}
- /* It is possible to have selector register inside data window.
- In that case, selector register is located on every page and
- it needs no page switching, when accessed alone. */
+ /*
+ * Calculate the address of the selector register in the corresponding
+ * data window if it is located on every page.
+ */
+ page_chg = in_range(range->selector_reg, range->window_start, range->window_len);
+ if (page_chg)
+ selector_reg = range->range_min + win_page * range->window_len +
+ range->selector_reg - range->window_start;
+
+ /*
+ * It is possible to have selector register inside data window.
+ * In that case, selector register is located on every page and it
+ * needs no page switching, when accessed alone.
+ *
+ * Nevertheless we should synchronize the cache values for it.
+ * This can't be properly achieved if the selector register is
+ * the first and the only one to be read inside the data window.
+ * That's why we update it in that case as well.
+ *
+ * However, we specifically avoid updating it for the default page,
+ * when it's overlapped with the real data window, to prevent from
+ * infinite looping.
+ */
if (val_num > 1 ||
+ (page_chg && selector_reg != range->selector_reg) ||
range->window_start + win_offset != range->selector_reg) {
/* Use separate work_buf during page switching */
orig_work_buf = map->work_buf;
@@ -1575,7 +1597,7 @@ static int _regmap_select_page(struct regmap *map, unsigned int *reg,
ret = _regmap_update_bits(map, range->selector_reg,
range->selector_mask,
win_page << range->selector_shift,
- &page_chg, false);
+ NULL, false);
map->work_buf = orig_work_buf;
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index a324ede6206d..af679375b193 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -917,9 +917,8 @@ static void zram_account_writeback_submit(struct zram *zram)
static int zram_writeback_complete(struct zram *zram, struct zram_wb_req *req)
{
- u32 size, index = req->pps->index;
- int err, prio;
- bool huge;
+ u32 index = req->pps->index;
+ int err;
err = blk_status_to_errno(req->bio.bi_status);
if (err) {
@@ -946,28 +945,13 @@ static int zram_writeback_complete(struct zram *zram, struct zram_wb_req *req)
goto out;
}
- if (zram->compressed_wb) {
- /*
- * ZRAM_WB slots get freed, we need to preserve data required
- * for read decompression.
- */
- size = get_slot_size(zram, index);
- prio = get_slot_comp_priority(zram, index);
- huge = test_slot_flag(zram, index, ZRAM_HUGE);
- }
-
- slot_free(zram, index);
- set_slot_flag(zram, index, ZRAM_WB);
+ clear_slot_flag(zram, index, ZRAM_IDLE);
+ if (test_slot_flag(zram, index, ZRAM_HUGE))
+ atomic64_dec(&zram->stats.huge_pages);
+ atomic64_sub(get_slot_size(zram, index), &zram->stats.compr_data_size);
+ zs_free(zram->mem_pool, get_slot_handle(zram, index));
set_slot_handle(zram, index, req->blk_idx);
-
- if (zram->compressed_wb) {
- if (huge)
- set_slot_flag(zram, index, ZRAM_HUGE);
- set_slot_size(zram, index, size);
- set_slot_comp_priority(zram, index, prio);
- }
-
- atomic64_inc(&zram->stats.pages_stored);
+ set_slot_flag(zram, index, ZRAM_WB);
out:
slot_unlock(zram, index);
@@ -2010,8 +1994,13 @@ static void slot_free(struct zram *zram, u32 index)
set_slot_comp_priority(zram, index, 0);
if (test_slot_flag(zram, index, ZRAM_HUGE)) {
+ /*
+ * Writeback completion decrements ->huge_pages but keeps
+ * ZRAM_HUGE flag for deferred decompression path.
+ */
+ if (!test_slot_flag(zram, index, ZRAM_WB))
+ atomic64_dec(&zram->stats.huge_pages);
clear_slot_flag(zram, index, ZRAM_HUGE);
- atomic64_dec(&zram->stats.huge_pages);
}
if (test_slot_flag(zram, index, ZRAM_WB)) {
diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c
index 246b6205c5e0..ab146894ba4e 100644
--- a/drivers/bluetooth/btintel.c
+++ b/drivers/bluetooth/btintel.c
@@ -251,11 +251,13 @@ void btintel_hw_error(struct hci_dev *hdev, u8 code)
bt_dev_err(hdev, "Hardware error 0x%2.2x", code);
+ hci_req_sync_lock(hdev);
+
skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL, HCI_INIT_TIMEOUT);
if (IS_ERR(skb)) {
bt_dev_err(hdev, "Reset after hardware error failed (%ld)",
PTR_ERR(skb));
- return;
+ goto unlock;
}
kfree_skb(skb);
@@ -263,18 +265,21 @@ void btintel_hw_error(struct hci_dev *hdev, u8 code)
if (IS_ERR(skb)) {
bt_dev_err(hdev, "Retrieving Intel exception info failed (%ld)",
PTR_ERR(skb));
- return;
+ goto unlock;
}
if (skb->len != 13) {
bt_dev_err(hdev, "Exception info size mismatch");
kfree_skb(skb);
- return;
+ goto unlock;
}
bt_dev_err(hdev, "Exception info %s", (char *)(skb->data + 1));
kfree_skb(skb);
+
+unlock:
+ hci_req_sync_unlock(hdev);
}
EXPORT_SYMBOL_GPL(btintel_hw_error);
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index a1c5eb993e47..5c535f3ab722 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -2376,8 +2376,11 @@ static void btusb_work(struct work_struct *work)
if (data->air_mode == HCI_NOTIFY_ENABLE_SCO_CVSD) {
if (hdev->voice_setting & 0x0020) {
static const int alts[3] = { 2, 4, 5 };
+ unsigned int sco_idx;
- new_alts = alts[data->sco_num - 1];
+ sco_idx = min_t(unsigned int, data->sco_num - 1,
+ ARRAY_SIZE(alts) - 1);
+ new_alts = alts[sco_idx];
} else {
new_alts = data->sco_num;
}
diff --git a/drivers/bluetooth/hci_h4.c b/drivers/bluetooth/hci_h4.c
index 07cc2a79a77b..a889a66a326f 100644
--- a/drivers/bluetooth/hci_h4.c
+++ b/drivers/bluetooth/hci_h4.c
@@ -109,9 +109,6 @@ static int h4_recv(struct hci_uart *hu, const void *data, int count)
{
struct h4_struct *h4 = hu->priv;
- if (!test_bit(HCI_UART_REGISTERED, &hu->flags))
- return -EUNATCH;
-
h4->rx_skb = h4_recv_buf(hu, h4->rx_skb, data, count,
h4_recv_pkts, ARRAY_SIZE(h4_recv_pkts));
if (IS_ERR(h4->rx_skb)) {
diff --git a/drivers/bluetooth/hci_ll.c b/drivers/bluetooth/hci_ll.c
index 91acf24f1ef5..91c96ad12342 100644
--- a/drivers/bluetooth/hci_ll.c
+++ b/drivers/bluetooth/hci_ll.c
@@ -541,6 +541,8 @@ static int download_firmware(struct ll_device *lldev)
if (err || !fw->data || !fw->size) {
bt_dev_err(lldev->hu.hdev, "request_firmware failed(errno %d) for %s",
err, bts_scr_name);
+ if (!err)
+ release_firmware(fw);
return -EINVAL;
}
ptr = (void *)fw->data;
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 277884d91913..1f794524a1d9 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1427,12 +1427,9 @@ static int cpufreq_policy_online(struct cpufreq_policy *policy,
* If there is a problem with its frequency table, take it
* offline and drop it.
*/
- if (policy->freq_table_sorted != CPUFREQ_TABLE_SORTED_ASCENDING &&
- policy->freq_table_sorted != CPUFREQ_TABLE_SORTED_DESCENDING) {
- ret = cpufreq_table_validate_and_sort(policy);
- if (ret)
- goto out_offline_policy;
- }
+ ret = cpufreq_table_validate_and_sort(policy);
+ if (ret)
+ goto out_offline_policy;
/* related_cpus should at least include policy->cpus. */
cpumask_copy(policy->related_cpus, policy->cpus);
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index e0e847764511..df01d33993d8 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -313,6 +313,17 @@ static void cs_start(struct cpufreq_policy *policy)
dbs_info->requested_freq = policy->cur;
}
+static void cs_limits(struct cpufreq_policy *policy)
+{
+ struct cs_policy_dbs_info *dbs_info = to_dbs_info(policy->governor_data);
+
+ /*
+ * The limits have changed, so may have the current frequency. Reset
+ * requested_freq to avoid any unintended outcomes due to the mismatch.
+ */
+ dbs_info->requested_freq = policy->cur;
+}
+
static struct dbs_governor cs_governor = {
.gov = CPUFREQ_DBS_GOVERNOR_INITIALIZER("conservative"),
.kobj_type = { .default_groups = cs_groups },
@@ -322,6 +333,7 @@ static struct dbs_governor cs_governor = {
.init = cs_init,
.exit = cs_exit,
.start = cs_start,
+ .limits = cs_limits,
};
#define CPU_FREQ_GOV_CONSERVATIVE (cs_governor.gov)
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 36eb7aee4bcd..acf101878733 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -563,6 +563,7 @@ EXPORT_SYMBOL_GPL(cpufreq_dbs_governor_stop);
void cpufreq_dbs_governor_limits(struct cpufreq_policy *policy)
{
+ struct dbs_governor *gov = dbs_governor_of(policy);
struct policy_dbs_info *policy_dbs;
/* Protect gov->gdbs_data against cpufreq_dbs_governor_exit() */
@@ -574,6 +575,8 @@ void cpufreq_dbs_governor_limits(struct cpufreq_policy *policy)
mutex_lock(&policy_dbs->update_mutex);
cpufreq_policy_apply_limits(policy);
gov_update_sample_delay(policy_dbs, 0);
+ if (gov->limits)
+ gov->limits(policy);
mutex_unlock(&policy_dbs->update_mutex);
out:
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index 168c23fd7fca..1462d59277bd 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -138,6 +138,7 @@ struct dbs_governor {
int (*init)(struct dbs_data *dbs_data);
void (*exit)(struct dbs_data *dbs_data);
void (*start)(struct cpufreq_policy *policy);
+ void (*limits)(struct cpufreq_policy *policy);
};
static inline struct dbs_governor *dbs_governor_of(struct cpufreq_policy *policy)
diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c
index 7f251daf03ce..5b364d8da4f9 100644
--- a/drivers/cpufreq/freq_table.c
+++ b/drivers/cpufreq/freq_table.c
@@ -360,6 +360,10 @@ int cpufreq_table_validate_and_sort(struct cpufreq_policy *policy)
if (policy_has_boost_freq(policy))
policy->boost_supported = true;
+ if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING ||
+ policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_DESCENDING)
+ return 0;
+
return set_freq_table_sorted(policy);
}
diff --git a/drivers/crypto/caam/caamalg_qi2.c b/drivers/crypto/caam/caamalg_qi2.c
index 167372936ca7..78964e1712e5 100644
--- a/drivers/crypto/caam/caamalg_qi2.c
+++ b/drivers/crypto/caam/caamalg_qi2.c
@@ -3326,9 +3326,10 @@ static int ahash_setkey(struct crypto_ahash *ahash, const u8 *key,
if (aligned_len < keylen)
return -EOVERFLOW;
- hashed_key = kmemdup(key, aligned_len, GFP_KERNEL);
+ hashed_key = kmalloc(aligned_len, GFP_KERNEL);
if (!hashed_key)
return -ENOMEM;
+ memcpy(hashed_key, key, keylen);
ret = hash_digest_key(ctx, &keylen, hashed_key, digestsize);
if (ret)
goto bad_free_key;
diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
index 628c43a7efc4..44122208f70c 100644
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -441,9 +441,10 @@ static int ahash_setkey(struct crypto_ahash *ahash,
if (aligned_len < keylen)
return -EOVERFLOW;
- hashed_key = kmemdup(key, keylen, GFP_KERNEL);
+ hashed_key = kmalloc(aligned_len, GFP_KERNEL);
if (!hashed_key)
return -ENOMEM;
+ memcpy(hashed_key, key, keylen);
ret = hash_digest_key(ctx, &keylen, hashed_key, digestsize);
if (ret)
goto bad_free_key;
diff --git a/drivers/crypto/tegra/tegra-se-aes.c b/drivers/crypto/tegra/tegra-se-aes.c
index 0e07d0523291..9210cceb4b7b 100644
--- a/drivers/crypto/tegra/tegra-se-aes.c
+++ b/drivers/crypto/tegra/tegra-se-aes.c
@@ -529,7 +529,7 @@ static struct tegra_se_alg tegra_aes_algs[] = {
.cra_name = "cbc(aes)",
.cra_driver_name = "cbc-aes-tegra",
.cra_priority = 500,
- .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER | CRYPTO_ALG_ASYNC,
+ .cra_flags = CRYPTO_ALG_ASYNC,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct tegra_aes_ctx),
.cra_alignmask = 0xf,
@@ -550,7 +550,7 @@ static struct tegra_se_alg tegra_aes_algs[] = {
.cra_name = "ecb(aes)",
.cra_driver_name = "ecb-aes-tegra",
.cra_priority = 500,
- .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER | CRYPTO_ALG_ASYNC,
+ .cra_flags = CRYPTO_ALG_ASYNC,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct tegra_aes_ctx),
.cra_alignmask = 0xf,
@@ -572,7 +572,7 @@ static struct tegra_se_alg tegra_aes_algs[] = {
.cra_name = "ctr(aes)",
.cra_driver_name = "ctr-aes-tegra",
.cra_priority = 500,
- .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER | CRYPTO_ALG_ASYNC,
+ .cra_flags = CRYPTO_ALG_ASYNC,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct tegra_aes_ctx),
.cra_alignmask = 0xf,
@@ -594,6 +594,7 @@ static struct tegra_se_alg tegra_aes_algs[] = {
.cra_name = "xts(aes)",
.cra_driver_name = "xts-aes-tegra",
.cra_priority = 500,
+ .cra_flags = CRYPTO_ALG_ASYNC,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct tegra_aes_ctx),
.cra_alignmask = (__alignof__(u64) - 1),
@@ -1922,6 +1923,7 @@ static struct tegra_se_alg tegra_aead_algs[] = {
.cra_name = "gcm(aes)",
.cra_driver_name = "gcm-aes-tegra",
.cra_priority = 500,
+ .cra_flags = CRYPTO_ALG_ASYNC,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct tegra_aead_ctx),
.cra_alignmask = 0xf,
@@ -1944,6 +1946,7 @@ static struct tegra_se_alg tegra_aead_algs[] = {
.cra_name = "ccm(aes)",
.cra_driver_name = "ccm-aes-tegra",
.cra_priority = 500,
+ .cra_flags = CRYPTO_ALG_ASYNC,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct tegra_aead_ctx),
.cra_alignmask = 0xf,
@@ -1971,7 +1974,7 @@ static struct tegra_se_alg tegra_cmac_algs[] = {
.cra_name = "cmac(aes)",
.cra_driver_name = "tegra-se-cmac",
.cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_AHASH,
+ .cra_flags = CRYPTO_ALG_ASYNC,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct tegra_cmac_ctx),
.cra_alignmask = 0,
diff --git a/drivers/crypto/tegra/tegra-se-hash.c b/drivers/crypto/tegra/tegra-se-hash.c
index 4a298ace6e9f..06bb5bf0fa33 100644
--- a/drivers/crypto/tegra/tegra-se-hash.c
+++ b/drivers/crypto/tegra/tegra-se-hash.c
@@ -761,7 +761,7 @@ static struct tegra_se_alg tegra_hash_algs[] = {
.cra_name = "sha1",
.cra_driver_name = "tegra-se-sha1",
.cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_AHASH,
+ .cra_flags = CRYPTO_ALG_ASYNC,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct tegra_sha_ctx),
.cra_alignmask = 0,
@@ -786,7 +786,7 @@ static struct tegra_se_alg tegra_hash_algs[] = {
.cra_name = "sha224",
.cra_driver_name = "tegra-se-sha224",
.cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_AHASH,
+ .cra_flags = CRYPTO_ALG_ASYNC,
.cra_blocksize = SHA224_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct tegra_sha_ctx),
.cra_alignmask = 0,
@@ -811,7 +811,7 @@ static struct tegra_se_alg tegra_hash_algs[] = {
.cra_name = "sha256",
.cra_driver_name = "tegra-se-sha256",
.cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_AHASH,
+ .cra_flags = CRYPTO_ALG_ASYNC,
.cra_blocksize = SHA256_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct tegra_sha_ctx),
.cra_alignmask = 0,
@@ -836,7 +836,7 @@ static struct tegra_se_alg tegra_hash_algs[] = {
.cra_name = "sha384",
.cra_driver_name = "tegra-se-sha384",
.cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_AHASH,
+ .cra_flags = CRYPTO_ALG_ASYNC,
.cra_blocksize = SHA384_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct tegra_sha_ctx),
.cra_alignmask = 0,
@@ -861,7 +861,7 @@ static struct tegra_se_alg tegra_hash_algs[] = {
.cra_name = "sha512",
.cra_driver_name = "tegra-se-sha512",
.cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_AHASH,
+ .cra_flags = CRYPTO_ALG_ASYNC,
.cra_blocksize = SHA512_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct tegra_sha_ctx),
.cra_alignmask = 0,
@@ -886,7 +886,7 @@ static struct tegra_se_alg tegra_hash_algs[] = {
.cra_name = "sha3-224",
.cra_driver_name = "tegra-se-sha3-224",
.cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_AHASH,
+ .cra_flags = CRYPTO_ALG_ASYNC,
.cra_blocksize = SHA3_224_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct tegra_sha_ctx),
.cra_alignmask = 0,
@@ -911,7 +911,7 @@ static struct tegra_se_alg tegra_hash_algs[] = {
.cra_name = "sha3-256",
.cra_driver_name = "tegra-se-sha3-256",
.cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_AHASH,
+ .cra_flags = CRYPTO_ALG_ASYNC,
.cra_blocksize = SHA3_256_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct tegra_sha_ctx),
.cra_alignmask = 0,
@@ -936,7 +936,7 @@ static struct tegra_se_alg tegra_hash_algs[] = {
.cra_name = "sha3-384",
.cra_driver_name = "tegra-se-sha3-384",
.cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_AHASH,
+ .cra_flags = CRYPTO_ALG_ASYNC,
.cra_blocksize = SHA3_384_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct tegra_sha_ctx),
.cra_alignmask = 0,
@@ -961,7 +961,7 @@ static struct tegra_se_alg tegra_hash_algs[] = {
.cra_name = "sha3-512",
.cra_driver_name = "tegra-se-sha3-512",
.cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_AHASH,
+ .cra_flags = CRYPTO_ALG_ASYNC,
.cra_blocksize = SHA3_512_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct tegra_sha_ctx),
.cra_alignmask = 0,
@@ -988,7 +988,8 @@ static struct tegra_se_alg tegra_hash_algs[] = {
.cra_name = "hmac(sha224)",
.cra_driver_name = "tegra-se-hmac-sha224",
.cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_NEED_FALLBACK,
+ .cra_flags = CRYPTO_ALG_ASYNC |
+ CRYPTO_ALG_NEED_FALLBACK,
.cra_blocksize = SHA224_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct tegra_sha_ctx),
.cra_alignmask = 0,
@@ -1015,7 +1016,8 @@ static struct tegra_se_alg tegra_hash_algs[] = {
.cra_name = "hmac(sha256)",
.cra_driver_name = "tegra-se-hmac-sha256",
.cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_NEED_FALLBACK,
+ .cra_flags = CRYPTO_ALG_ASYNC |
+ CRYPTO_ALG_NEED_FALLBACK,
.cra_blocksize = SHA256_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct tegra_sha_ctx),
.cra_alignmask = 0,
@@ -1042,7 +1044,8 @@ static struct tegra_se_alg tegra_hash_algs[] = {
.cra_name = "hmac(sha384)",
.cra_driver_name = "tegra-se-hmac-sha384",
.cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_NEED_FALLBACK,
+ .cra_flags = CRYPTO_ALG_ASYNC |
+ CRYPTO_ALG_NEED_FALLBACK,
.cra_blocksize = SHA384_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct tegra_sha_ctx),
.cra_alignmask = 0,
@@ -1069,7 +1072,8 @@ static struct tegra_se_alg tegra_hash_algs[] = {
.cra_name = "hmac(sha512)",
.cra_driver_name = "tegra-se-hmac-sha512",
.cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_NEED_FALLBACK,
+ .cra_flags = CRYPTO_ALG_ASYNC |
+ CRYPTO_ALG_NEED_FALLBACK,
.cra_blocksize = SHA512_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct tegra_sha_ctx),
.cra_alignmask = 0,
diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig
index 4589bf11d3fe..80aeb0d556bd 100644
--- a/drivers/cxl/Kconfig
+++ b/drivers/cxl/Kconfig
@@ -59,6 +59,7 @@ config CXL_ACPI
tristate "CXL ACPI: Platform Support"
depends on ACPI
depends on ACPI_NUMA
+ depends on CXL_PMEM || !CXL_PMEM
default CXL_BUS
select ACPI_TABLE_LIB
select ACPI_HMAT
diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
index c222e98ae736..cb5d5a047a9d 100644
--- a/drivers/cxl/core/hdm.c
+++ b/drivers/cxl/core/hdm.c
@@ -94,7 +94,6 @@ static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info)
struct cxl_hdm *cxlhdm;
void __iomem *hdm;
u32 ctrl;
- int i;
if (!info)
return false;
@@ -113,22 +112,16 @@ static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info)
return false;
/*
- * If any decoders are committed already, there should not be any
- * emulated DVSEC decoders.
+ * If HDM decoders are globally enabled, do not fall back to DVSEC
+ * range emulation. Zeroed decoder registers after region teardown
+ * do not imply absence of HDM capability.
+ *
+ * Falling back to DVSEC here would treat the decoder as AUTO and
+ * may incorrectly latch default interleave settings.
*/
- for (i = 0; i < cxlhdm->decoder_count; i++) {
- ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(i));
- dev_dbg(&info->port->dev,
- "decoder%d.%d: committed: %ld base: %#x_%.8x size: %#x_%.8x\n",
- info->port->id, i,
- FIELD_GET(CXL_HDM_DECODER0_CTRL_COMMITTED, ctrl),
- readl(hdm + CXL_HDM_DECODER0_BASE_HIGH_OFFSET(i)),
- readl(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(i)),
- readl(hdm + CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(i)),
- readl(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(i)));
- if (FIELD_GET(CXL_HDM_DECODER0_CTRL_COMMITTED, ctrl))
- return false;
- }
+ ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET);
+ if (ctrl & CXL_HDM_DECODER_ENABLE)
+ return false;
return true;
}
diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index e7a6452bf544..12386d912705 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -1301,7 +1301,7 @@ int cxl_mem_sanitize(struct cxl_memdev *cxlmd, u16 cmd)
* Require an endpoint to be safe otherwise the driver can not
* be sure that the device is unmapped.
*/
- if (endpoint && cxl_num_decoders_committed(endpoint) == 0)
+ if (cxlmd->dev.driver && cxl_num_decoders_committed(endpoint) == 0)
return __cxl_mem_sanitize(mds, cmd);
return -EBUSY;
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 0c5957d1d329..c5aacd7054f1 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -552,10 +552,13 @@ static void cxl_port_release(struct device *dev)
xa_destroy(&port->dports);
xa_destroy(&port->regions);
ida_free(&cxl_port_ida, port->id);
- if (is_cxl_root(port))
+
+ if (is_cxl_root(port)) {
kfree(to_cxl_root(port));
- else
+ } else {
+ put_device(dev->parent);
kfree(port);
+ }
}
static ssize_t decoders_committed_show(struct device *dev,
@@ -707,6 +710,7 @@ static struct cxl_port *cxl_port_alloc(struct device *uport_dev,
struct cxl_port *iter;
dev->parent = &parent_port->dev;
+ get_device(dev->parent);
port->depth = parent_port->depth + 1;
port->parent_dport = parent_dport;
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 42874948b589..c37ae0b28bbb 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -3854,8 +3854,10 @@ static int __construct_region(struct cxl_region *cxlr,
}
rc = sysfs_update_group(&cxlr->dev.kobj, &cxl_region_group);
- if (rc)
+ if (rc) {
+ kfree(res);
return rc;
+ }
rc = insert_resource(cxlrd->res, res);
if (rc) {
diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c
index 082ec0f1c3a0..261dff7ced9f 100644
--- a/drivers/cxl/pmem.c
+++ b/drivers/cxl/pmem.c
@@ -554,7 +554,7 @@ static __exit void cxl_pmem_exit(void)
MODULE_DESCRIPTION("CXL PMEM: Persistent Memory Support");
MODULE_LICENSE("GPL v2");
-module_init(cxl_pmem_init);
+subsys_initcall(cxl_pmem_init);
module_exit(cxl_pmem_exit);
MODULE_IMPORT_NS("CXL");
MODULE_ALIAS_CXL(CXL_DEVICE_NVDIMM_BRIDGE);
diff --git a/drivers/dma/dw-edma/dw-edma-core.c b/drivers/dma/dw-edma/dw-edma-core.c
index e7d698b352d3..5397dbda4f72 100644
--- a/drivers/dma/dw-edma/dw-edma-core.c
+++ b/drivers/dma/dw-edma/dw-edma-core.c
@@ -844,6 +844,7 @@ static int dw_edma_irq_request(struct dw_edma *dw,
{
struct dw_edma_chip *chip = dw->chip;
struct device *dev = dw->chip->dev;
+ struct msi_desc *msi_desc;
u32 wr_mask = 1;
u32 rd_mask = 1;
int i, err = 0;
@@ -895,9 +896,12 @@ static int dw_edma_irq_request(struct dw_edma *dw,
&dw->irq[i]);
if (err)
goto err_irq_free;
-
- if (irq_get_msi_desc(irq))
+ msi_desc = irq_get_msi_desc(irq);
+ if (msi_desc) {
get_cached_msi_msg(irq, &dw->irq[i].msi);
+ if (!msi_desc->pci.msi_attrib.is_msix)
+ dw->irq[i].msi.data = dw->irq[0].msi.data + i;
+ }
}
dw->nr_irqs = i;
diff --git a/drivers/dma/dw-edma/dw-hdma-v0-core.c b/drivers/dma/dw-edma/dw-hdma-v0-core.c
index e3f8db4fe909..ce8f7254bab2 100644
--- a/drivers/dma/dw-edma/dw-hdma-v0-core.c
+++ b/drivers/dma/dw-edma/dw-hdma-v0-core.c
@@ -252,10 +252,10 @@ static void dw_hdma_v0_core_start(struct dw_edma_chunk *chunk, bool first)
lower_32_bits(chunk->ll_region.paddr));
SET_CH_32(dw, chan->dir, chan->id, llp.msb,
upper_32_bits(chunk->ll_region.paddr));
+ /* Set consumer cycle */
+ SET_CH_32(dw, chan->dir, chan->id, cycle_sync,
+ HDMA_V0_CONSUMER_CYCLE_STAT | HDMA_V0_CONSUMER_CYCLE_BIT);
}
- /* Set consumer cycle */
- SET_CH_32(dw, chan->dir, chan->id, cycle_sync,
- HDMA_V0_CONSUMER_CYCLE_STAT | HDMA_V0_CONSUMER_CYCLE_BIT);
dw_hdma_v0_sync_ll_data(chunk);
diff --git a/drivers/dma/fsl-edma-main.c b/drivers/dma/fsl-edma-main.c
index dbcdd1e68319..b596baa0a182 100644
--- a/drivers/dma/fsl-edma-main.c
+++ b/drivers/dma/fsl-edma-main.c
@@ -317,10 +317,8 @@ static struct dma_chan *fsl_edma3_xlate(struct of_phandle_args *dma_spec,
return NULL;
i = fsl_chan - fsl_edma->chans;
- fsl_chan->priority = dma_spec->args[1];
- fsl_chan->is_rxchan = dma_spec->args[2] & FSL_EDMA_RX;
- fsl_chan->is_remote = dma_spec->args[2] & FSL_EDMA_REMOTE;
- fsl_chan->is_multi_fifo = dma_spec->args[2] & FSL_EDMA_MULTI_FIFO;
+ if (!b_chmux && i != dma_spec->args[0])
+ continue;
if ((dma_spec->args[2] & FSL_EDMA_EVEN_CH) && (i & 0x1))
continue;
@@ -328,17 +326,15 @@ static struct dma_chan *fsl_edma3_xlate(struct of_phandle_args *dma_spec,
if ((dma_spec->args[2] & FSL_EDMA_ODD_CH) && !(i & 0x1))
continue;
- if (!b_chmux && i == dma_spec->args[0]) {
- chan = dma_get_slave_channel(chan);
- chan->device->privatecnt++;
- return chan;
- } else if (b_chmux && !fsl_chan->srcid) {
- /* if controller support channel mux, choose a free channel */
- chan = dma_get_slave_channel(chan);
- chan->device->privatecnt++;
- fsl_chan->srcid = dma_spec->args[0];
- return chan;
- }
+ fsl_chan->srcid = dma_spec->args[0];
+ fsl_chan->priority = dma_spec->args[1];
+ fsl_chan->is_rxchan = dma_spec->args[2] & FSL_EDMA_RX;
+ fsl_chan->is_remote = dma_spec->args[2] & FSL_EDMA_REMOTE;
+ fsl_chan->is_multi_fifo = dma_spec->args[2] & FSL_EDMA_MULTI_FIFO;
+
+ chan = dma_get_slave_channel(chan);
+ chan->device->privatecnt++;
+ return chan;
}
return NULL;
}
diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c
index c37d233535f9..0366c7cf3502 100644
--- a/drivers/dma/idxd/cdev.c
+++ b/drivers/dma/idxd/cdev.c
@@ -158,11 +158,7 @@ static const struct device_type idxd_cdev_file_type = {
static void idxd_cdev_dev_release(struct device *dev)
{
struct idxd_cdev *idxd_cdev = dev_to_cdev(dev);
- struct idxd_cdev_context *cdev_ctx;
- struct idxd_wq *wq = idxd_cdev->wq;
- cdev_ctx = &ictx[wq->idxd->data->type];
- ida_free(&cdev_ctx->minor_ida, idxd_cdev->minor);
kfree(idxd_cdev);
}
@@ -582,11 +578,15 @@ int idxd_wq_add_cdev(struct idxd_wq *wq)
void idxd_wq_del_cdev(struct idxd_wq *wq)
{
+ struct idxd_cdev_context *cdev_ctx;
struct idxd_cdev *idxd_cdev;
idxd_cdev = wq->idxd_cdev;
wq->idxd_cdev = NULL;
cdev_device_del(&idxd_cdev->cdev, cdev_dev(idxd_cdev));
+
+ cdev_ctx = &ictx[wq->idxd->data->type];
+ ida_free(&cdev_ctx->minor_ida, idxd_cdev->minor);
put_device(cdev_dev(idxd_cdev));
}
diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c
index c26128529ff4..131138483b87 100644
--- a/drivers/dma/idxd/device.c
+++ b/drivers/dma/idxd/device.c
@@ -175,6 +175,7 @@ void idxd_wq_free_resources(struct idxd_wq *wq)
free_descs(wq);
dma_free_coherent(dev, wq->compls_size, wq->compls, wq->compls_addr);
sbitmap_queue_free(&wq->sbq);
+ wq->type = IDXD_WQT_NONE;
}
EXPORT_SYMBOL_NS_GPL(idxd_wq_free_resources, "IDXD");
@@ -382,7 +383,6 @@ static void idxd_wq_disable_cleanup(struct idxd_wq *wq)
lockdep_assert_held(&wq->wq_lock);
wq->state = IDXD_WQ_DISABLED;
memset(wq->wqcfg, 0, idxd->wqcfg_size);
- wq->type = IDXD_WQT_NONE;
wq->threshold = 0;
wq->priority = 0;
wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES;
@@ -831,8 +831,7 @@ static void idxd_device_evl_free(struct idxd_device *idxd)
struct device *dev = &idxd->pdev->dev;
struct idxd_evl *evl = idxd->evl;
- gencfg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET);
- if (!gencfg.evl_en)
+ if (!evl)
return;
mutex_lock(&evl->lock);
@@ -1125,7 +1124,11 @@ int idxd_device_config(struct idxd_device *idxd)
{
int rc;
- lockdep_assert_held(&idxd->dev_lock);
+ guard(spinlock)(&idxd->dev_lock);
+
+ if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+ return 0;
+
rc = idxd_wqs_setup(idxd);
if (rc < 0)
return rc;
@@ -1332,6 +1335,11 @@ void idxd_wq_free_irq(struct idxd_wq *wq)
free_irq(ie->vector, ie);
idxd_flush_pending_descs(ie);
+
+ /* The interrupt might have been already released by FLR */
+ if (ie->int_handle == INVALID_INT_HANDLE)
+ return;
+
if (idxd->request_int_handles)
idxd_device_release_int_handle(idxd, ie->int_handle, IDXD_IRQ_MSIX);
idxd_device_clear_perm_entry(idxd, ie);
@@ -1340,6 +1348,23 @@ void idxd_wq_free_irq(struct idxd_wq *wq)
ie->pasid = IOMMU_PASID_INVALID;
}
+void idxd_wq_flush_descs(struct idxd_wq *wq)
+{
+ struct idxd_irq_entry *ie = &wq->ie;
+ struct idxd_device *idxd = wq->idxd;
+
+ guard(mutex)(&wq->wq_lock);
+
+ if (wq->state != IDXD_WQ_ENABLED || wq->type != IDXD_WQT_KERNEL)
+ return;
+
+ idxd_flush_pending_descs(ie);
+ if (idxd->request_int_handles)
+ idxd_device_release_int_handle(idxd, ie->int_handle, IDXD_IRQ_MSIX);
+ idxd_device_clear_perm_entry(idxd, ie);
+ ie->int_handle = INVALID_INT_HANDLE;
+}
+
int idxd_wq_request_irq(struct idxd_wq *wq)
{
struct idxd_device *idxd = wq->idxd;
@@ -1454,11 +1479,7 @@ int idxd_drv_enable_wq(struct idxd_wq *wq)
}
}
- rc = 0;
- spin_lock(&idxd->dev_lock);
- if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
- rc = idxd_device_config(idxd);
- spin_unlock(&idxd->dev_lock);
+ rc = idxd_device_config(idxd);
if (rc < 0) {
dev_dbg(dev, "Writing wq %d config failed: %d\n", wq->id, rc);
goto err;
@@ -1533,7 +1554,6 @@ void idxd_drv_disable_wq(struct idxd_wq *wq)
idxd_wq_reset(wq);
idxd_wq_free_resources(wq);
percpu_ref_exit(&wq->wq_active);
- wq->type = IDXD_WQT_NONE;
wq->client_count = 0;
}
EXPORT_SYMBOL_NS_GPL(idxd_drv_disable_wq, "IDXD");
@@ -1554,10 +1574,7 @@ int idxd_device_drv_probe(struct idxd_dev *idxd_dev)
}
/* Device configuration */
- spin_lock(&idxd->dev_lock);
- if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
- rc = idxd_device_config(idxd);
- spin_unlock(&idxd->dev_lock);
+ rc = idxd_device_config(idxd);
if (rc < 0)
return -ENXIO;
diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c
index dbecd699237e..9937b671f637 100644
--- a/drivers/dma/idxd/dma.c
+++ b/drivers/dma/idxd/dma.c
@@ -194,6 +194,22 @@ static void idxd_dma_release(struct dma_device *device)
kfree(idxd_dma);
}
+static int idxd_dma_terminate_all(struct dma_chan *c)
+{
+ struct idxd_wq *wq = to_idxd_wq(c);
+
+ idxd_wq_flush_descs(wq);
+
+ return 0;
+}
+
+static void idxd_dma_synchronize(struct dma_chan *c)
+{
+ struct idxd_wq *wq = to_idxd_wq(c);
+
+ idxd_wq_drain(wq);
+}
+
int idxd_register_dma_device(struct idxd_device *idxd)
{
struct idxd_dma_dev *idxd_dma;
@@ -224,6 +240,8 @@ int idxd_register_dma_device(struct idxd_device *idxd)
dma->device_issue_pending = idxd_dma_issue_pending;
dma->device_alloc_chan_resources = idxd_dma_alloc_chan_resources;
dma->device_free_chan_resources = idxd_dma_free_chan_resources;
+ dma->device_terminate_all = idxd_dma_terminate_all;
+ dma->device_synchronize = idxd_dma_synchronize;
rc = dma_async_device_register(dma);
if (rc < 0) {
diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h
index ea8c4daed38d..ce78b9a7c641 100644
--- a/drivers/dma/idxd/idxd.h
+++ b/drivers/dma/idxd/idxd.h
@@ -803,6 +803,7 @@ void idxd_wq_quiesce(struct idxd_wq *wq);
int idxd_wq_init_percpu_ref(struct idxd_wq *wq);
void idxd_wq_free_irq(struct idxd_wq *wq);
int idxd_wq_request_irq(struct idxd_wq *wq);
+void idxd_wq_flush_descs(struct idxd_wq *wq);
/* submission */
int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc);
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index fb80803d5b57..f1cfc7790d95 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -973,7 +973,8 @@ static void idxd_device_config_restore(struct idxd_device *idxd,
idxd->rdbuf_limit = idxd_saved->saved_idxd.rdbuf_limit;
- idxd->evl->size = saved_evl->size;
+ if (idxd->evl)
+ idxd->evl->size = saved_evl->size;
for (i = 0; i < idxd->max_groups; i++) {
struct idxd_group *saved_group, *group;
@@ -1104,12 +1105,10 @@ static void idxd_reset_done(struct pci_dev *pdev)
idxd_device_config_restore(idxd, idxd->idxd_saved);
/* Re-configure IDXD device if allowed. */
- if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) {
- rc = idxd_device_config(idxd);
- if (rc < 0) {
- dev_err(dev, "HALT: %s config fails\n", idxd_name);
- goto out;
- }
+ rc = idxd_device_config(idxd);
+ if (rc < 0) {
+ dev_err(dev, "HALT: %s config fails\n", idxd_name);
+ goto out;
}
/* Bind IDXD device to driver. */
@@ -1147,6 +1146,7 @@ static void idxd_reset_done(struct pci_dev *pdev)
}
out:
kfree(idxd->idxd_saved);
+ idxd->idxd_saved = NULL;
}
static const struct pci_error_handlers idxd_error_handler = {
diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c
index 7782f8c51c32..6a25e1fd0e62 100644
--- a/drivers/dma/idxd/irq.c
+++ b/drivers/dma/idxd/irq.c
@@ -397,6 +397,17 @@ static void idxd_device_flr(struct work_struct *work)
dev_err(&idxd->pdev->dev, "FLR failed\n");
}
+static void idxd_wqs_flush_descs(struct idxd_device *idxd)
+{
+ int i;
+
+ for (i = 0; i < idxd->max_wqs; i++) {
+ struct idxd_wq *wq = idxd->wqs[i];
+
+ idxd_wq_flush_descs(wq);
+ }
+}
+
static irqreturn_t idxd_halt(struct idxd_device *idxd)
{
union gensts_reg gensts;
@@ -415,6 +426,11 @@ static irqreturn_t idxd_halt(struct idxd_device *idxd)
} else if (gensts.reset_type == IDXD_DEVICE_RESET_FLR) {
idxd->state = IDXD_DEV_HALTED;
idxd_mask_error_interrupts(idxd);
+ /* Flush all pending descriptors, and disable
+ * interrupts, they will be re-enabled when FLR
+ * concludes.
+ */
+ idxd_wqs_flush_descs(idxd);
dev_dbg(&idxd->pdev->dev,
"idxd halted, doing FLR. After FLR, configs are restored\n");
INIT_WORK(&idxd->work, idxd_device_flr);
diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c
index 6db1c5fcedc5..03217041b8b3 100644
--- a/drivers/dma/idxd/submit.c
+++ b/drivers/dma/idxd/submit.c
@@ -138,7 +138,7 @@ static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie,
*/
list_for_each_entry_safe(d, t, &flist, list) {
list_del_init(&d->list);
- idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, true,
+ idxd_dma_complete_txd(d, IDXD_COMPLETE_ABORT, true,
NULL, NULL);
}
}
diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c
index cc2c83d7f710..6d251095c350 100644
--- a/drivers/dma/idxd/sysfs.c
+++ b/drivers/dma/idxd/sysfs.c
@@ -1836,6 +1836,7 @@ static void idxd_conf_device_release(struct device *dev)
{
struct idxd_device *idxd = confdev_to_idxd(dev);
+ destroy_workqueue(idxd->wq);
kfree(idxd->groups);
bitmap_free(idxd->wq_enable_map);
kfree(idxd->wqs);
diff --git a/drivers/dma/sh/rz-dmac.c b/drivers/dma/sh/rz-dmac.c
index d84ca551b2bf..f30bdf69c740 100644
--- a/drivers/dma/sh/rz-dmac.c
+++ b/drivers/dma/sh/rz-dmac.c
@@ -10,6 +10,7 @@
*/
#include <linux/bitfield.h>
+#include <linux/cleanup.h>
#include <linux/dma-mapping.h>
#include <linux/dmaengine.h>
#include <linux/interrupt.h>
@@ -296,13 +297,10 @@ static void rz_dmac_disable_hw(struct rz_dmac_chan *channel)
{
struct dma_chan *chan = &channel->vc.chan;
struct rz_dmac *dmac = to_rz_dmac(chan->device);
- unsigned long flags;
dev_dbg(dmac->dev, "%s channel %d\n", __func__, channel->index);
- local_irq_save(flags);
rz_dmac_ch_writel(channel, CHCTRL_DEFAULT, CHCTRL, 1);
- local_irq_restore(flags);
}
static void rz_dmac_set_dmars_register(struct rz_dmac *dmac, int nr, u32 dmars)
@@ -447,6 +445,7 @@ static int rz_dmac_alloc_chan_resources(struct dma_chan *chan)
if (!desc)
break;
+ /* No need to lock. This is called only for the 1st client. */
list_add_tail(&desc->node, &channel->ld_free);
channel->descs_allocated++;
}
@@ -502,18 +501,21 @@ rz_dmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
dev_dbg(dmac->dev, "%s channel: %d src=0x%pad dst=0x%pad len=%zu\n",
__func__, channel->index, &src, &dest, len);
- if (list_empty(&channel->ld_free))
- return NULL;
+ scoped_guard(spinlock_irqsave, &channel->vc.lock) {
+ if (list_empty(&channel->ld_free))
+ return NULL;
- desc = list_first_entry(&channel->ld_free, struct rz_dmac_desc, node);
+ desc = list_first_entry(&channel->ld_free, struct rz_dmac_desc, node);
- desc->type = RZ_DMAC_DESC_MEMCPY;
- desc->src = src;
- desc->dest = dest;
- desc->len = len;
- desc->direction = DMA_MEM_TO_MEM;
+ desc->type = RZ_DMAC_DESC_MEMCPY;
+ desc->src = src;
+ desc->dest = dest;
+ desc->len = len;
+ desc->direction = DMA_MEM_TO_MEM;
+
+ list_move_tail(channel->ld_free.next, &channel->ld_queue);
+ }
- list_move_tail(channel->ld_free.next, &channel->ld_queue);
return vchan_tx_prep(&channel->vc, &desc->vd, flags);
}
@@ -529,27 +531,29 @@ rz_dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
int dma_length = 0;
int i = 0;
- if (list_empty(&channel->ld_free))
- return NULL;
+ scoped_guard(spinlock_irqsave, &channel->vc.lock) {
+ if (list_empty(&channel->ld_free))
+ return NULL;
- desc = list_first_entry(&channel->ld_free, struct rz_dmac_desc, node);
+ desc = list_first_entry(&channel->ld_free, struct rz_dmac_desc, node);
- for_each_sg(sgl, sg, sg_len, i) {
- dma_length += sg_dma_len(sg);
- }
+ for_each_sg(sgl, sg, sg_len, i)
+ dma_length += sg_dma_len(sg);
- desc->type = RZ_DMAC_DESC_SLAVE_SG;
- desc->sg = sgl;
- desc->sgcount = sg_len;
- desc->len = dma_length;
- desc->direction = direction;
+ desc->type = RZ_DMAC_DESC_SLAVE_SG;
+ desc->sg = sgl;
+ desc->sgcount = sg_len;
+ desc->len = dma_length;
+ desc->direction = direction;
- if (direction == DMA_DEV_TO_MEM)
- desc->src = channel->src_per_address;
- else
- desc->dest = channel->dst_per_address;
+ if (direction == DMA_DEV_TO_MEM)
+ desc->src = channel->src_per_address;
+ else
+ desc->dest = channel->dst_per_address;
+
+ list_move_tail(channel->ld_free.next, &channel->ld_queue);
+ }
- list_move_tail(channel->ld_free.next, &channel->ld_queue);
return vchan_tx_prep(&channel->vc, &desc->vd, flags);
}
@@ -561,8 +565,8 @@ static int rz_dmac_terminate_all(struct dma_chan *chan)
unsigned int i;
LIST_HEAD(head);
- rz_dmac_disable_hw(channel);
spin_lock_irqsave(&channel->vc.lock, flags);
+ rz_dmac_disable_hw(channel);
for (i = 0; i < DMAC_NR_LMDESC; i++)
lmdesc[i].header = 0;
@@ -699,7 +703,9 @@ static void rz_dmac_irq_handle_channel(struct rz_dmac_chan *channel)
if (chstat & CHSTAT_ER) {
dev_err(dmac->dev, "DMAC err CHSTAT_%d = %08X\n",
channel->index, chstat);
- rz_dmac_ch_writel(channel, CHCTRL_DEFAULT, CHCTRL, 1);
+
+ scoped_guard(spinlock_irqsave, &channel->vc.lock)
+ rz_dmac_ch_writel(channel, CHCTRL_DEFAULT, CHCTRL, 1);
goto done;
}
diff --git a/drivers/dma/xilinx/xdma.c b/drivers/dma/xilinx/xdma.c
index d02a4dac2291..782a55edc55b 100644
--- a/drivers/dma/xilinx/xdma.c
+++ b/drivers/dma/xilinx/xdma.c
@@ -1234,8 +1234,8 @@ static int xdma_probe(struct platform_device *pdev)
xdev->rmap = devm_regmap_init_mmio(&pdev->dev, reg_base,
&xdma_regmap_config);
- if (!xdev->rmap) {
- xdma_err(xdev, "config regmap failed: %d", ret);
+ if (IS_ERR(xdev->rmap)) {
+ xdma_err(xdev, "config regmap failed: %pe", xdev->rmap);
goto failed;
}
INIT_LIST_HEAD(&xdev->dma_dev.channels);
diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
index b53292e02448..e3a18ee42aa2 100644
--- a/drivers/dma/xilinx/xilinx_dma.c
+++ b/drivers/dma/xilinx/xilinx_dma.c
@@ -997,16 +997,16 @@ static u32 xilinx_dma_get_residue(struct xilinx_dma_chan *chan,
struct xilinx_cdma_tx_segment,
node);
cdma_hw = &cdma_seg->hw;
- residue += (cdma_hw->control - cdma_hw->status) &
- chan->xdev->max_buffer_len;
+ residue += (cdma_hw->control & chan->xdev->max_buffer_len) -
+ (cdma_hw->status & chan->xdev->max_buffer_len);
} else if (chan->xdev->dma_config->dmatype ==
XDMA_TYPE_AXIDMA) {
axidma_seg = list_entry(entry,
struct xilinx_axidma_tx_segment,
node);
axidma_hw = &axidma_seg->hw;
- residue += (axidma_hw->control - axidma_hw->status) &
- chan->xdev->max_buffer_len;
+ residue += (axidma_hw->control & chan->xdev->max_buffer_len) -
+ (axidma_hw->status & chan->xdev->max_buffer_len);
} else {
aximcdma_seg =
list_entry(entry,
@@ -1014,8 +1014,8 @@ static u32 xilinx_dma_get_residue(struct xilinx_dma_chan *chan,
node);
aximcdma_hw = &aximcdma_seg->hw;
residue +=
- (aximcdma_hw->control - aximcdma_hw->status) &
- chan->xdev->max_buffer_len;
+ (aximcdma_hw->control & chan->xdev->max_buffer_len) -
+ (aximcdma_hw->status & chan->xdev->max_buffer_len);
}
}
@@ -1235,14 +1235,6 @@ static int xilinx_dma_alloc_chan_resources(struct dma_chan *dchan)
dma_cookie_init(dchan);
- if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) {
- /* For AXI DMA resetting once channel will reset the
- * other channel as well so enable the interrupts here.
- */
- dma_ctrl_set(chan, XILINX_DMA_REG_DMACR,
- XILINX_DMA_DMAXR_ALL_IRQ_MASK);
- }
-
if ((chan->xdev->dma_config->dmatype == XDMA_TYPE_CDMA) && chan->has_sg)
dma_ctrl_set(chan, XILINX_DMA_REG_DMACR,
XILINX_CDMA_CR_SGMODE);
@@ -1564,8 +1556,29 @@ static void xilinx_dma_start_transfer(struct xilinx_dma_chan *chan)
if (chan->err)
return;
- if (list_empty(&chan->pending_list))
+ if (list_empty(&chan->pending_list)) {
+ if (chan->cyclic) {
+ struct xilinx_dma_tx_descriptor *desc;
+ struct list_head *entry;
+
+ desc = list_last_entry(&chan->done_list,
+ struct xilinx_dma_tx_descriptor, node);
+ list_for_each(entry, &desc->segments) {
+ struct xilinx_axidma_tx_segment *axidma_seg;
+ struct xilinx_axidma_desc_hw *axidma_hw;
+ axidma_seg = list_entry(entry,
+ struct xilinx_axidma_tx_segment,
+ node);
+ axidma_hw = &axidma_seg->hw;
+ axidma_hw->status = 0;
+ }
+
+ list_splice_tail_init(&chan->done_list, &chan->active_list);
+ chan->desc_pendingcount = 0;
+ chan->idle = false;
+ }
return;
+ }
if (!chan->idle)
return;
@@ -1591,6 +1604,7 @@ static void xilinx_dma_start_transfer(struct xilinx_dma_chan *chan)
head_desc->async_tx.phys);
reg &= ~XILINX_DMA_CR_DELAY_MAX;
reg |= chan->irq_delay << XILINX_DMA_CR_DELAY_SHIFT;
+ reg |= XILINX_DMA_DMAXR_ALL_IRQ_MASK;
dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, reg);
xilinx_dma_start(chan);
@@ -3024,7 +3038,7 @@ static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev,
return -EINVAL;
}
- xdev->common.directions |= chan->direction;
+ xdev->common.directions |= BIT(chan->direction);
/* Request the interrupt */
chan->irq = of_irq_get(node, chan->tdest);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 40c22438b1d2..4f27c75abedb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -692,9 +692,9 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
goto err_ib_sched;
}
- /* Drop the initial kref_init count (see drm_sched_main as example) */
- dma_fence_put(f);
ret = dma_fence_wait(f, false);
+ /* Drop the returned fence reference after the wait completes */
+ dma_fence_put(f);
err_ib_sched:
amdgpu_job_free(job);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index d8296dfc5e8a..6d8531f9b882 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4207,7 +4207,8 @@ fail:
static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
{
- char *input = amdgpu_lockup_timeout;
+ char buf[AMDGPU_MAX_TIMEOUT_PARAM_LENGTH];
+ char *input = buf;
char *timeout_setting = NULL;
int index = 0;
long timeout;
@@ -4217,9 +4218,17 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout =
adev->video_timeout = msecs_to_jiffies(2000);
- if (!strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH))
+ if (!strnlen(amdgpu_lockup_timeout, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH))
return 0;
+ /*
+ * strsep() destructively modifies its input by replacing delimiters
+ * with '\0'. Use a stack copy so the global module parameter buffer
+ * remains intact for multi-GPU systems where this function is called
+ * once per device.
+ */
+ strscpy(buf, amdgpu_lockup_timeout, sizeof(buf));
+
while ((timeout_setting = strsep(&input, ",")) &&
strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
ret = kstrtol(timeout_setting, 0, &timeout);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index 64c519cd7395..d88523568b62 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -35,10 +35,13 @@
* PASIDs are global address space identifiers that can be shared
* between the GPU, an IOMMU and the driver. VMs on different devices
* may use the same PASID if they share the same address
- * space. Therefore PASIDs are allocated using a global IDA. VMs are
- * looked up from the PASID per amdgpu_device.
+ * space. Therefore PASIDs are allocated using IDR cyclic allocator
+ * (similar to kernel PID allocation) which naturally delays reuse.
+ * VMs are looked up from the PASID per amdgpu_device.
*/
-static DEFINE_IDA(amdgpu_pasid_ida);
+
+static DEFINE_IDR(amdgpu_pasid_idr);
+static DEFINE_SPINLOCK(amdgpu_pasid_idr_lock);
/* Helper to free pasid from a fence callback */
struct amdgpu_pasid_cb {
@@ -50,8 +53,8 @@ struct amdgpu_pasid_cb {
* amdgpu_pasid_alloc - Allocate a PASID
* @bits: Maximum width of the PASID in bits, must be at least 1
*
- * Allocates a PASID of the given width while keeping smaller PASIDs
- * available if possible.
+ * Uses kernel's IDR cyclic allocator (same as PID allocation).
+ * Allocates sequentially with automatic wrap-around.
*
* Returns a positive integer on success. Returns %-EINVAL if bits==0.
* Returns %-ENOSPC if no PASID was available. Returns %-ENOMEM on
@@ -59,14 +62,15 @@ struct amdgpu_pasid_cb {
*/
int amdgpu_pasid_alloc(unsigned int bits)
{
- int pasid = -EINVAL;
+ int pasid;
- for (bits = min(bits, 31U); bits > 0; bits--) {
- pasid = ida_alloc_range(&amdgpu_pasid_ida, 1U << (bits - 1),
- (1U << bits) - 1, GFP_KERNEL);
- if (pasid != -ENOSPC)
- break;
- }
+ if (bits == 0)
+ return -EINVAL;
+
+ spin_lock(&amdgpu_pasid_idr_lock);
+ pasid = idr_alloc_cyclic(&amdgpu_pasid_idr, NULL, 1,
+ 1U << bits, GFP_KERNEL);
+ spin_unlock(&amdgpu_pasid_idr_lock);
if (pasid >= 0)
trace_amdgpu_pasid_allocated(pasid);
@@ -81,7 +85,10 @@ int amdgpu_pasid_alloc(unsigned int bits)
void amdgpu_pasid_free(u32 pasid)
{
trace_amdgpu_pasid_freed(pasid);
- ida_free(&amdgpu_pasid_ida, pasid);
+
+ spin_lock(&amdgpu_pasid_idr_lock);
+ idr_remove(&amdgpu_pasid_idr, pasid);
+ spin_unlock(&amdgpu_pasid_idr_lock);
}
static void amdgpu_pasid_free_cb(struct dma_fence *fence,
@@ -616,3 +623,15 @@ void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev)
}
}
}
+
+/**
+ * amdgpu_pasid_mgr_cleanup - cleanup PASID manager
+ *
+ * Cleanup the IDR allocator.
+ */
+void amdgpu_pasid_mgr_cleanup(void)
+{
+ spin_lock(&amdgpu_pasid_idr_lock);
+ idr_destroy(&amdgpu_pasid_idr);
+ spin_unlock(&amdgpu_pasid_idr_lock);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
index b3649cd3af56..a57919478d3b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
@@ -74,6 +74,7 @@ int amdgpu_pasid_alloc(unsigned int bits);
void amdgpu_pasid_free(u32 pasid);
void amdgpu_pasid_free_delayed(struct dma_resv *resv,
u32 pasid);
+void amdgpu_pasid_mgr_cleanup(void);
bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
struct amdgpu_vmid *id);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index c60cbce356cf..a677e38a493b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2898,6 +2898,7 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
xa_destroy(&adev->vm_manager.pasids);
amdgpu_vmid_mgr_fini(adev);
+ amdgpu_pasid_mgr_cleanup();
}
/**
@@ -2973,14 +2974,14 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
if (!root)
return false;
- addr /= AMDGPU_GPU_PAGE_SIZE;
-
if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid,
- node_id, addr, ts, write_fault)) {
+ node_id, addr >> PAGE_SHIFT, ts, write_fault)) {
amdgpu_bo_unref(&root);
return true;
}
+ addr /= AMDGPU_GPU_PAGE_SIZE;
+
r = amdgpu_bo_reserve(root, true);
if (r)
goto error_unref;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 09dabb3b3297..462a32abf720 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -3170,11 +3170,11 @@ static int kfd_ioctl_create_process(struct file *filep, struct kfd_process *p, v
struct kfd_process *process;
int ret;
- /* Each FD owns only one kfd_process */
- if (p->context_id != KFD_CONTEXT_ID_PRIMARY)
+ if (!filep->private_data || !p)
return -EINVAL;
- if (!filep->private_data || !p)
+ /* Each FD owns only one kfd_process */
+ if (p->context_id != KFD_CONTEXT_ID_PRIMARY)
return -EINVAL;
mutex_lock(&kfd_processes_mutex);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 085cc98bd875..2328c1aa0ead 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -3909,8 +3909,9 @@ void amdgpu_dm_update_connector_after_detect(
aconnector->dc_sink = sink;
dc_sink_retain(aconnector->dc_sink);
+ drm_edid_free(aconnector->drm_edid);
+ aconnector->drm_edid = NULL;
if (sink->dc_edid.length == 0) {
- aconnector->drm_edid = NULL;
hdmi_cec_unset_edid(aconnector);
if (aconnector->dc_link->aux_mode) {
drm_dp_cec_unset_edid(&aconnector->dm_dp_aux.aux);
@@ -5422,7 +5423,7 @@ static void setup_backlight_device(struct amdgpu_display_manager *dm,
caps = &dm->backlight_caps[aconnector->bl_idx];
/* Only offer ABM property when non-OLED and user didn't turn off by module parameter */
- if (!caps->ext_caps->bits.oled && amdgpu_dm_abm_level < 0)
+ if (caps->ext_caps && !caps->ext_caps->bits.oled && amdgpu_dm_abm_level < 0)
drm_object_attach_property(&aconnector->base.base,
dm->adev->mode_info.abm_level_property,
ABM_SYSFS_CONTROL);
@@ -12524,6 +12525,11 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
if (dc_resource_is_dsc_encoding_supported(dc)) {
for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
+ dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
+ dm_new_crtc_state->mode_changed_independent_from_dsc = new_crtc_state->mode_changed;
+ }
+
+ for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
if (drm_atomic_crtc_needs_modeset(new_crtc_state)) {
ret = add_affected_mst_dsc_crtcs(state, crtc);
if (ret) {
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 800813671748..d15812d51d72 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -984,6 +984,7 @@ struct dm_crtc_state {
bool freesync_vrr_info_changed;
+ bool mode_changed_independent_from_dsc;
bool dsc_force_changed;
bool vrr_supported;
struct mod_freesync_config freesync_config;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 7be50e8c0636..5d8c4c7020b1 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -1744,9 +1744,11 @@ int pre_validate_dsc(struct drm_atomic_state *state,
int ind = find_crtc_index_in_state_by_stream(state, stream);
if (ind >= 0) {
+ struct dm_crtc_state *dm_new_crtc_state = to_dm_crtc_state(state->crtcs[ind].new_state);
+
DRM_INFO_ONCE("%s:%d MST_DSC no mode changed for stream 0x%p\n",
__func__, __LINE__, stream);
- state->crtcs[ind].new_state->mode_changed = 0;
+ dm_new_crtc_state->base.mode_changed = dm_new_crtc_state->mode_changed_independent_from_dsc;
}
}
}
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c
index 92c123aca0c9..fdcf8db6be50 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c
@@ -650,9 +650,6 @@ static struct link_encoder *dce100_link_encoder_create(
return &enc110->base;
}
- if (enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs))
- return NULL;
-
link_regs_id =
map_transmitter_id_to_phy_instance(enc_init_data->transmitter);
@@ -661,7 +658,8 @@ static struct link_encoder *dce100_link_encoder_create(
&link_enc_feature,
&link_enc_regs[link_regs_id],
&link_enc_aux_regs[enc_init_data->channel - 1],
- &link_enc_hpd_regs[enc_init_data->hpd_source]);
+ enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs) ?
+ NULL : &link_enc_hpd_regs[enc_init_data->hpd_source]);
return &enc110->base;
}
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c
index 95852d277c22..ab71f645c90e 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c
@@ -671,7 +671,7 @@ static struct link_encoder *dce110_link_encoder_create(
kzalloc_obj(struct dce110_link_encoder);
int link_regs_id;
- if (!enc110 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs))
+ if (!enc110)
return NULL;
link_regs_id =
@@ -682,7 +682,8 @@ static struct link_encoder *dce110_link_encoder_create(
&link_enc_feature,
&link_enc_regs[link_regs_id],
&link_enc_aux_regs[enc_init_data->channel - 1],
- &link_enc_hpd_regs[enc_init_data->hpd_source]);
+ enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs) ?
+ NULL : &link_enc_hpd_regs[enc_init_data->hpd_source]);
return &enc110->base;
}
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c
index 58c6a00397cf..b7051bfd4326 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c
@@ -632,7 +632,7 @@ static struct link_encoder *dce112_link_encoder_create(
kzalloc_obj(struct dce110_link_encoder);
int link_regs_id;
- if (!enc110 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs))
+ if (!enc110)
return NULL;
link_regs_id =
@@ -643,7 +643,8 @@ static struct link_encoder *dce112_link_encoder_create(
&link_enc_feature,
&link_enc_regs[link_regs_id],
&link_enc_aux_regs[enc_init_data->channel - 1],
- &link_enc_hpd_regs[enc_init_data->hpd_source]);
+ enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs) ?
+ NULL : &link_enc_hpd_regs[enc_init_data->hpd_source]);
return &enc110->base;
}
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c
index 71d76b021375..7ee70f7b3aa7 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c
@@ -716,7 +716,7 @@ static struct link_encoder *dce120_link_encoder_create(
kzalloc_obj(struct dce110_link_encoder);
int link_regs_id;
- if (!enc110 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs))
+ if (!enc110)
return NULL;
link_regs_id =
@@ -727,7 +727,8 @@ static struct link_encoder *dce120_link_encoder_create(
&link_enc_feature,
&link_enc_regs[link_regs_id],
&link_enc_aux_regs[enc_init_data->channel - 1],
- &link_enc_hpd_regs[enc_init_data->hpd_source]);
+ enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs) ?
+ NULL : &link_enc_hpd_regs[enc_init_data->hpd_source]);
return &enc110->base;
}
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c
index c27645708286..3d52973dd7f2 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c
@@ -746,18 +746,16 @@ static struct link_encoder *dce60_link_encoder_create(
return &enc110->base;
}
- if (enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs))
- return NULL;
-
link_regs_id =
map_transmitter_id_to_phy_instance(enc_init_data->transmitter);
dce60_link_encoder_construct(enc110,
- enc_init_data,
- &link_enc_feature,
- &link_enc_regs[link_regs_id],
- &link_enc_aux_regs[enc_init_data->channel - 1],
- &link_enc_hpd_regs[enc_init_data->hpd_source]);
+ enc_init_data,
+ &link_enc_feature,
+ &link_enc_regs[link_regs_id],
+ &link_enc_aux_regs[enc_init_data->channel - 1],
+ enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs) ?
+ NULL : &link_enc_hpd_regs[enc_init_data->hpd_source]);
return &enc110->base;
}
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c
index d66d8ac6d897..89927727a0d9 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c
@@ -752,9 +752,6 @@ static struct link_encoder *dce80_link_encoder_create(
return &enc110->base;
}
- if (enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs))
- return NULL;
-
link_regs_id =
map_transmitter_id_to_phy_instance(enc_init_data->transmitter);
@@ -763,7 +760,8 @@ static struct link_encoder *dce80_link_encoder_create(
&link_enc_feature,
&link_enc_regs[link_regs_id],
&link_enc_aux_regs[enc_init_data->channel - 1],
- &link_enc_hpd_regs[enc_init_data->hpd_source]);
+ enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs) ?
+ NULL : &link_enc_hpd_regs[enc_init_data->hpd_source]);
return &enc110->base;
}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index a8d63d4d1f6e..554f616328c3 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -59,6 +59,10 @@
#define to_amdgpu_device(x) (container_of(x, struct amdgpu_device, pm.smu_i2c))
+static void smu_v13_0_0_get_od_setting_limits(struct smu_context *smu,
+ int od_feature_bit,
+ int32_t *min, int32_t *max);
+
static const struct smu_feature_bits smu_v13_0_0_dpm_features = {
.bits = {
SMU_FEATURE_BIT_INIT(FEATURE_DPM_GFXCLK_BIT),
@@ -1043,8 +1047,35 @@ static bool smu_v13_0_0_is_od_feature_supported(struct smu_context *smu,
PPTable_t *pptable = smu->smu_table.driver_pptable;
const OverDriveLimits_t * const overdrive_upperlimits =
&pptable->SkuTable.OverDriveLimitsBasicMax;
+ int32_t min_value, max_value;
+ bool feature_enabled;
- return overdrive_upperlimits->FeatureCtrlMask & (1U << od_feature_bit);
+ switch (od_feature_bit) {
+ case PP_OD_FEATURE_FAN_CURVE_BIT:
+ feature_enabled = !!(overdrive_upperlimits->FeatureCtrlMask & (1U << od_feature_bit));
+ if (feature_enabled) {
+ smu_v13_0_0_get_od_setting_limits(smu, PP_OD_FEATURE_FAN_CURVE_TEMP,
+ &min_value, &max_value);
+ if (!min_value && !max_value) {
+ feature_enabled = false;
+ goto out;
+ }
+
+ smu_v13_0_0_get_od_setting_limits(smu, PP_OD_FEATURE_FAN_CURVE_PWM,
+ &min_value, &max_value);
+ if (!min_value && !max_value) {
+ feature_enabled = false;
+ goto out;
+ }
+ }
+ break;
+ default:
+ feature_enabled = !!(overdrive_upperlimits->FeatureCtrlMask & (1U << od_feature_bit));
+ break;
+ }
+
+out:
+ return feature_enabled;
}
static void smu_v13_0_0_get_od_setting_limits(struct smu_context *smu,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 896b51c8a9a7..870bcc86fd79 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -1391,7 +1391,7 @@ static int smu_v13_0_6_emit_clk_levels(struct smu_context *smu,
break;
case SMU_OD_MCLK:
if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(SET_UCLK_MAX)))
- return 0;
+ return -EOPNOTSUPP;
size += sysfs_emit_at(buf, size, "%s:\n", "OD_MCLK");
size += sysfs_emit_at(buf, size, "0: %uMhz\n1: %uMhz\n",
@@ -2122,6 +2122,7 @@ static int smu_v13_0_6_usr_edit_dpm_table(struct smu_context *smu,
{
struct smu_dpm_context *smu_dpm = &(smu->smu_dpm);
struct smu_13_0_dpm_context *dpm_context = smu_dpm->dpm_context;
+ struct smu_dpm_table *uclk_table = &dpm_context->dpm_tables.uclk_table;
struct smu_umd_pstate_table *pstate_table = &smu->pstate_table;
uint32_t min_clk;
uint32_t max_clk;
@@ -2221,14 +2222,16 @@ static int smu_v13_0_6_usr_edit_dpm_table(struct smu_context *smu,
if (ret)
return ret;
- min_clk = SMU_DPM_TABLE_MIN(
- &dpm_context->dpm_tables.uclk_table);
- max_clk = SMU_DPM_TABLE_MAX(
- &dpm_context->dpm_tables.uclk_table);
- ret = smu_v13_0_6_set_soft_freq_limited_range(
- smu, SMU_UCLK, min_clk, max_clk, false);
- if (ret)
- return ret;
+ if (SMU_DPM_TABLE_MAX(uclk_table) !=
+ pstate_table->uclk_pstate.curr.max) {
+ min_clk = SMU_DPM_TABLE_MIN(&dpm_context->dpm_tables.uclk_table);
+ max_clk = SMU_DPM_TABLE_MAX(&dpm_context->dpm_tables.uclk_table);
+ ret = smu_v13_0_6_set_soft_freq_limited_range(smu,
+ SMU_UCLK, min_clk,
+ max_clk, false);
+ if (ret)
+ return ret;
+ }
smu_v13_0_reset_custom_level(smu);
}
break;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
index 5500a0f12f0e..f331e87858c9 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -59,6 +59,10 @@
#define to_amdgpu_device(x) (container_of(x, struct amdgpu_device, pm.smu_i2c))
+static void smu_v13_0_7_get_od_setting_limits(struct smu_context *smu,
+ int od_feature_bit,
+ int32_t *min, int32_t *max);
+
static const struct smu_feature_bits smu_v13_0_7_dpm_features = {
.bits = {
SMU_FEATURE_BIT_INIT(FEATURE_DPM_GFXCLK_BIT),
@@ -1053,8 +1057,35 @@ static bool smu_v13_0_7_is_od_feature_supported(struct smu_context *smu,
PPTable_t *pptable = smu->smu_table.driver_pptable;
const OverDriveLimits_t * const overdrive_upperlimits =
&pptable->SkuTable.OverDriveLimitsBasicMax;
+ int32_t min_value, max_value;
+ bool feature_enabled;
- return overdrive_upperlimits->FeatureCtrlMask & (1U << od_feature_bit);
+ switch (od_feature_bit) {
+ case PP_OD_FEATURE_FAN_CURVE_BIT:
+ feature_enabled = !!(overdrive_upperlimits->FeatureCtrlMask & (1U << od_feature_bit));
+ if (feature_enabled) {
+ smu_v13_0_7_get_od_setting_limits(smu, PP_OD_FEATURE_FAN_CURVE_TEMP,
+ &min_value, &max_value);
+ if (!min_value && !max_value) {
+ feature_enabled = false;
+ goto out;
+ }
+
+ smu_v13_0_7_get_od_setting_limits(smu, PP_OD_FEATURE_FAN_CURVE_PWM,
+ &min_value, &max_value);
+ if (!min_value && !max_value) {
+ feature_enabled = false;
+ goto out;
+ }
+ }
+ break;
+ default:
+ feature_enabled = !!(overdrive_upperlimits->FeatureCtrlMask & (1U << od_feature_bit));
+ break;
+ }
+
+out:
+ return feature_enabled;
}
static void smu_v13_0_7_get_od_setting_limits(struct smu_context *smu,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
index 73762d9b5969..c3ebfac062a7 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
@@ -56,6 +56,10 @@
#define to_amdgpu_device(x) (container_of(x, struct amdgpu_device, pm.smu_i2c))
+static void smu_v14_0_2_get_od_setting_limits(struct smu_context *smu,
+ int od_feature_bit,
+ int32_t *min, int32_t *max);
+
static const struct smu_feature_bits smu_v14_0_2_dpm_features = {
.bits = { SMU_FEATURE_BIT_INIT(FEATURE_DPM_GFXCLK_BIT),
SMU_FEATURE_BIT_INIT(FEATURE_DPM_UCLK_BIT),
@@ -922,8 +926,35 @@ static bool smu_v14_0_2_is_od_feature_supported(struct smu_context *smu,
PPTable_t *pptable = smu->smu_table.driver_pptable;
const OverDriveLimits_t * const overdrive_upperlimits =
&pptable->SkuTable.OverDriveLimitsBasicMax;
+ int32_t min_value, max_value;
+ bool feature_enabled;
- return overdrive_upperlimits->FeatureCtrlMask & (1U << od_feature_bit);
+ switch (od_feature_bit) {
+ case PP_OD_FEATURE_FAN_CURVE_BIT:
+ feature_enabled = !!(overdrive_upperlimits->FeatureCtrlMask & (1U << od_feature_bit));
+ if (feature_enabled) {
+ smu_v14_0_2_get_od_setting_limits(smu, PP_OD_FEATURE_FAN_CURVE_TEMP,
+ &min_value, &max_value);
+ if (!min_value && !max_value) {
+ feature_enabled = false;
+ goto out;
+ }
+
+ smu_v14_0_2_get_od_setting_limits(smu, PP_OD_FEATURE_FAN_CURVE_PWM,
+ &min_value, &max_value);
+ if (!min_value && !max_value) {
+ feature_enabled = false;
+ goto out;
+ }
+ }
+ break;
+ default:
+ feature_enabled = !!(overdrive_upperlimits->FeatureCtrlMask & (1U << od_feature_bit));
+ break;
+ }
+
+out:
+ return feature_enabled;
}
static void smu_v14_0_2_get_od_setting_limits(struct smu_context *smu,
diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
index 7b5a49935ae4..c549293b5bb6 100644
--- a/drivers/gpu/drm/drm_gem_shmem_helper.c
+++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
@@ -550,27 +550,27 @@ int drm_gem_shmem_dumb_create(struct drm_file *file, struct drm_device *dev,
}
EXPORT_SYMBOL_GPL(drm_gem_shmem_dumb_create);
-static bool drm_gem_shmem_try_map_pmd(struct vm_fault *vmf, unsigned long addr,
- struct page *page)
+static vm_fault_t try_insert_pfn(struct vm_fault *vmf, unsigned int order,
+ unsigned long pfn)
{
+ if (!order) {
+ return vmf_insert_pfn(vmf->vma, vmf->address, pfn);
#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
- unsigned long pfn = page_to_pfn(page);
- unsigned long paddr = pfn << PAGE_SHIFT;
- bool aligned = (addr & ~PMD_MASK) == (paddr & ~PMD_MASK);
-
- if (aligned &&
- pmd_none(*vmf->pmd) &&
- folio_test_pmd_mappable(page_folio(page))) {
- pfn &= PMD_MASK >> PAGE_SHIFT;
- if (vmf_insert_pfn_pmd(vmf, pfn, false) == VM_FAULT_NOPAGE)
- return true;
- }
+ } else if (order == PMD_ORDER) {
+ unsigned long paddr = pfn << PAGE_SHIFT;
+ bool aligned = (vmf->address & ~PMD_MASK) == (paddr & ~PMD_MASK);
+
+ if (aligned &&
+ folio_test_pmd_mappable(page_folio(pfn_to_page(pfn)))) {
+ pfn &= PMD_MASK >> PAGE_SHIFT;
+ return vmf_insert_pfn_pmd(vmf, pfn, false);
+ }
#endif
-
- return false;
+ }
+ return VM_FAULT_FALLBACK;
}
-static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf)
+static vm_fault_t drm_gem_shmem_any_fault(struct vm_fault *vmf, unsigned int order)
{
struct vm_area_struct *vma = vmf->vma;
struct drm_gem_object *obj = vma->vm_private_data;
@@ -581,6 +581,9 @@ static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf)
pgoff_t page_offset;
unsigned long pfn;
+ if (order && order != PMD_ORDER)
+ return VM_FAULT_FALLBACK;
+
/* Offset to faulty address in the VMA. */
page_offset = vmf->pgoff - vma->vm_pgoff;
@@ -593,13 +596,8 @@ static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf)
goto out;
}
- if (drm_gem_shmem_try_map_pmd(vmf, vmf->address, pages[page_offset])) {
- ret = VM_FAULT_NOPAGE;
- goto out;
- }
-
pfn = page_to_pfn(pages[page_offset]);
- ret = vmf_insert_pfn(vma, vmf->address, pfn);
+ ret = try_insert_pfn(vmf, order, pfn);
out:
dma_resv_unlock(shmem->base.resv);
@@ -607,6 +605,11 @@ static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf)
return ret;
}
+static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf)
+{
+ return drm_gem_shmem_any_fault(vmf, 0);
+}
+
static void drm_gem_shmem_vm_open(struct vm_area_struct *vma)
{
struct drm_gem_object *obj = vma->vm_private_data;
@@ -643,6 +646,9 @@ static void drm_gem_shmem_vm_close(struct vm_area_struct *vma)
const struct vm_operations_struct drm_gem_shmem_vm_ops = {
.fault = drm_gem_shmem_fault,
+#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
+ .huge_fault = drm_gem_shmem_any_fault,
+#endif
.open = drm_gem_shmem_vm_open,
.close = drm_gem_shmem_vm_close,
};
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 250734dee928..8d9fd1917c6e 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -602,7 +602,7 @@ int drm_syncobj_get_handle(struct drm_file *file_private,
drm_syncobj_get(syncobj);
ret = xa_alloc(&file_private->syncobj_xa, handle, syncobj, xa_limit_32b,
- GFP_NOWAIT);
+ GFP_KERNEL);
if (ret)
drm_syncobj_put(syncobj);
@@ -716,7 +716,7 @@ static int drm_syncobj_fd_to_handle(struct drm_file *file_private,
drm_syncobj_get(syncobj);
ret = xa_alloc(&file_private->syncobj_xa, handle, syncobj, xa_limit_32b,
- GFP_NOWAIT);
+ GFP_KERNEL);
if (ret)
drm_syncobj_put(syncobj);
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index c4246481fc2f..0f82bf771a92 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -4602,6 +4602,7 @@ intel_crtc_prepare_cleared_state(struct intel_atomic_state *state,
struct intel_crtc_state *crtc_state =
intel_atomic_get_new_crtc_state(state, crtc);
struct intel_crtc_state *saved_state;
+ int err;
saved_state = intel_crtc_state_alloc(crtc);
if (!saved_state)
@@ -4610,7 +4611,12 @@ intel_crtc_prepare_cleared_state(struct intel_atomic_state *state,
/* free the old crtc_state->hw members */
intel_crtc_free_hw_state(crtc_state);
- intel_dp_tunnel_atomic_clear_stream_bw(state, crtc_state);
+ err = intel_dp_tunnel_atomic_clear_stream_bw(state, crtc_state);
+ if (err) {
+ kfree(saved_state);
+
+ return err;
+ }
/* FIXME: before the switch to atomic started, a new pipe_config was
* kzalloc'd. Code that depends on any field being zero should be
diff --git a/drivers/gpu/drm/i915/display/intel_dp_tunnel.c b/drivers/gpu/drm/i915/display/intel_dp_tunnel.c
index 83865c02d477..55b423fd6b6f 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_tunnel.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_tunnel.c
@@ -621,19 +621,27 @@ int intel_dp_tunnel_atomic_compute_stream_bw(struct intel_atomic_state *state,
*
* Clear any DP tunnel stream BW requirement set by
* intel_dp_tunnel_atomic_compute_stream_bw().
+ *
+ * Returns 0 in case of success, a negative error code otherwise.
*/
-void intel_dp_tunnel_atomic_clear_stream_bw(struct intel_atomic_state *state,
- struct intel_crtc_state *crtc_state)
+int intel_dp_tunnel_atomic_clear_stream_bw(struct intel_atomic_state *state,
+ struct intel_crtc_state *crtc_state)
{
struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+ int err;
if (!crtc_state->dp_tunnel_ref.tunnel)
- return;
+ return 0;
+
+ err = drm_dp_tunnel_atomic_set_stream_bw(&state->base,
+ crtc_state->dp_tunnel_ref.tunnel,
+ crtc->pipe, 0);
+ if (err)
+ return err;
- drm_dp_tunnel_atomic_set_stream_bw(&state->base,
- crtc_state->dp_tunnel_ref.tunnel,
- crtc->pipe, 0);
drm_dp_tunnel_ref_put(&crtc_state->dp_tunnel_ref);
+
+ return 0;
}
/**
diff --git a/drivers/gpu/drm/i915/display/intel_dp_tunnel.h b/drivers/gpu/drm/i915/display/intel_dp_tunnel.h
index 7f0f720e8dca..10ab9eebcef6 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_tunnel.h
+++ b/drivers/gpu/drm/i915/display/intel_dp_tunnel.h
@@ -40,8 +40,8 @@ int intel_dp_tunnel_atomic_compute_stream_bw(struct intel_atomic_state *state,
struct intel_dp *intel_dp,
const struct intel_connector *connector,
struct intel_crtc_state *crtc_state);
-void intel_dp_tunnel_atomic_clear_stream_bw(struct intel_atomic_state *state,
- struct intel_crtc_state *crtc_state);
+int intel_dp_tunnel_atomic_clear_stream_bw(struct intel_atomic_state *state,
+ struct intel_crtc_state *crtc_state);
int intel_dp_tunnel_atomic_add_state_for_crtc(struct intel_atomic_state *state,
struct intel_crtc *crtc);
@@ -88,9 +88,12 @@ intel_dp_tunnel_atomic_compute_stream_bw(struct intel_atomic_state *state,
return 0;
}
-static inline void
+static inline int
intel_dp_tunnel_atomic_clear_stream_bw(struct intel_atomic_state *state,
- struct intel_crtc_state *crtc_state) {}
+ struct intel_crtc_state *crtc_state)
+{
+ return 0;
+}
static inline int
intel_dp_tunnel_atomic_add_state_for_crtc(struct intel_atomic_state *state,
diff --git a/drivers/gpu/drm/i915/display/intel_gmbus.c b/drivers/gpu/drm/i915/display/intel_gmbus.c
index a7bce0c6a17e..264e6843bff1 100644
--- a/drivers/gpu/drm/i915/display/intel_gmbus.c
+++ b/drivers/gpu/drm/i915/display/intel_gmbus.c
@@ -496,8 +496,10 @@ gmbus_xfer_read_chunk(struct intel_display *display,
val = intel_de_read_fw(display, GMBUS3(display));
do {
- if (extra_byte_added && len == 1)
+ if (extra_byte_added && len == 1) {
+ len--;
break;
+ }
*buf++ = val & 0xff;
val >>= 8;
diff --git a/drivers/gpu/drm/i915/display/intel_plane.c b/drivers/gpu/drm/i915/display/intel_plane.c
index e06a0618b4c6..076b9b356481 100644
--- a/drivers/gpu/drm/i915/display/intel_plane.c
+++ b/drivers/gpu/drm/i915/display/intel_plane.c
@@ -436,11 +436,16 @@ void intel_plane_copy_hw_state(struct intel_plane_state *plane_state,
drm_framebuffer_get(plane_state->hw.fb);
}
+static void unlink_nv12_plane(struct intel_crtc_state *crtc_state,
+ struct intel_plane_state *plane_state);
+
void intel_plane_set_invisible(struct intel_crtc_state *crtc_state,
struct intel_plane_state *plane_state)
{
struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
+ unlink_nv12_plane(crtc_state, plane_state);
+
crtc_state->active_planes &= ~BIT(plane->id);
crtc_state->scaled_planes &= ~BIT(plane->id);
crtc_state->nv12_planes &= ~BIT(plane->id);
@@ -1513,6 +1518,9 @@ static void unlink_nv12_plane(struct intel_crtc_state *crtc_state,
struct intel_display *display = to_intel_display(plane_state);
struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
+ if (!plane_state->planar_linked_plane)
+ return;
+
plane_state->planar_linked_plane = NULL;
if (!plane_state->is_y_plane)
@@ -1550,8 +1558,7 @@ static int icl_check_nv12_planes(struct intel_atomic_state *state,
if (plane->pipe != crtc->pipe)
continue;
- if (plane_state->planar_linked_plane)
- unlink_nv12_plane(crtc_state, plane_state);
+ unlink_nv12_plane(crtc_state, plane_state);
}
if (!crtc_state->nv12_planes)
diff --git a/drivers/gpu/drm/i915/i915_wait_util.h b/drivers/gpu/drm/i915/i915_wait_util.h
index 7376898e3bf8..e1ed7921ec70 100644
--- a/drivers/gpu/drm/i915/i915_wait_util.h
+++ b/drivers/gpu/drm/i915/i915_wait_util.h
@@ -25,9 +25,9 @@
might_sleep(); \
for (;;) { \
const bool expired__ = ktime_after(ktime_get_raw(), end__); \
- OP; \
/* Guarantee COND check prior to timeout */ \
barrier(); \
+ OP; \
if (COND) { \
ret__ = 0; \
break; \
diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c
index 17c67f02016b..aaf6c9ebd319 100644
--- a/drivers/gpu/drm/mediatek/mtk_dsi.c
+++ b/drivers/gpu/drm/mediatek/mtk_dsi.c
@@ -1236,6 +1236,11 @@ static int mtk_dsi_probe(struct platform_device *pdev)
dsi->host.ops = &mtk_dsi_ops;
dsi->host.dev = dev;
+
+ init_waitqueue_head(&dsi->irq_wait_queue);
+
+ platform_set_drvdata(pdev, dsi);
+
ret = mipi_dsi_host_register(&dsi->host);
if (ret < 0)
return dev_err_probe(dev, ret, "Failed to register DSI host\n");
@@ -1247,10 +1252,6 @@ static int mtk_dsi_probe(struct platform_device *pdev)
return dev_err_probe(&pdev->dev, ret, "Failed to request DSI irq\n");
}
- init_waitqueue_head(&dsi->irq_wait_queue);
-
- platform_set_drvdata(pdev, dsi);
-
dsi->bridge.of_node = dev->of_node;
dsi->bridge.type = DRM_MODE_CONNECTOR_DSI;
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 24fc64fc832e..9d66f168ab8a 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -553,6 +553,7 @@
#define ENABLE_SMP_LD_RENDER_SURFACE_CONTROL REG_BIT(44 - 32)
#define FORCE_SLM_FENCE_SCOPE_TO_TILE REG_BIT(42 - 32)
#define FORCE_UGM_FENCE_SCOPE_TO_TILE REG_BIT(41 - 32)
+#define L3_128B_256B_WRT_DIS REG_BIT(40 - 32)
#define MAXREQS_PER_BANK REG_GENMASK(39 - 32, 37 - 32)
#define DISABLE_128B_EVICTION_COMMAND_UDW REG_BIT(36 - 32)
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 2d9ce2c4cb4f..713a303c9053 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -1442,9 +1442,9 @@ static int op_check_svm_userptr(struct xe_vm *vm, struct xe_vma_op *op,
err = vma_check_userptr(vm, op->map.vma, pt_update);
break;
case DRM_GPUVA_OP_REMAP:
- if (op->remap.prev)
+ if (op->remap.prev && !op->remap.skip_prev)
err = vma_check_userptr(vm, op->remap.prev, pt_update);
- if (!err && op->remap.next)
+ if (!err && op->remap.next && !op->remap.skip_next)
err = vma_check_userptr(vm, op->remap.next, pt_update);
break;
case DRM_GPUVA_OP_UNMAP:
@@ -2198,12 +2198,12 @@ static int op_prepare(struct xe_vm *vm,
err = unbind_op_prepare(tile, pt_update_ops, old);
- if (!err && op->remap.prev) {
+ if (!err && op->remap.prev && !op->remap.skip_prev) {
err = bind_op_prepare(vm, tile, pt_update_ops,
op->remap.prev, false);
pt_update_ops->wait_vm_bookkeep = true;
}
- if (!err && op->remap.next) {
+ if (!err && op->remap.next && !op->remap.skip_next) {
err = bind_op_prepare(vm, tile, pt_update_ops,
op->remap.next, false);
pt_update_ops->wait_vm_bookkeep = true;
@@ -2428,10 +2428,10 @@ static void op_commit(struct xe_vm *vm,
unbind_op_commit(vm, tile, pt_update_ops, old, fence, fence2);
- if (op->remap.prev)
+ if (op->remap.prev && !op->remap.skip_prev)
bind_op_commit(vm, tile, pt_update_ops, op->remap.prev,
fence, fence2, false);
- if (op->remap.next)
+ if (op->remap.next && !op->remap.skip_next)
bind_op_commit(vm, tile, pt_update_ops, op->remap.next,
fence, fence2, false);
break;
diff --git a/drivers/gpu/drm/xe/xe_sriov_packet.c b/drivers/gpu/drm/xe/xe_sriov_packet.c
index 968f32496282..2ae9eff2a7c0 100644
--- a/drivers/gpu/drm/xe/xe_sriov_packet.c
+++ b/drivers/gpu/drm/xe/xe_sriov_packet.c
@@ -341,6 +341,8 @@ ssize_t xe_sriov_packet_write_single(struct xe_device *xe, unsigned int vfid,
ret = xe_sriov_pf_migration_restore_produce(xe, vfid, *data);
if (ret) {
xe_sriov_packet_free(*data);
+ *data = NULL;
+
return ret;
}
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index a82e3a4fb389..ffdbab106a58 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -2554,7 +2554,6 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
if (!err && op->remap.skip_prev) {
op->remap.prev->tile_present =
tile_present;
- op->remap.prev = NULL;
}
}
if (op->remap.next) {
@@ -2564,11 +2563,13 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
if (!err && op->remap.skip_next) {
op->remap.next->tile_present =
tile_present;
- op->remap.next = NULL;
}
}
- /* Adjust for partial unbind after removing VMA from VM */
+ /*
+ * Adjust for partial unbind after removing VMA from VM. In case
+ * of unwind we might need to undo this later.
+ */
if (!err) {
op->base.remap.unmap->va->va.addr = op->remap.start;
op->base.remap.unmap->va->va.range = op->remap.range;
@@ -2687,6 +2688,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
op->remap.start = xe_vma_start(old);
op->remap.range = xe_vma_size(old);
+ op->remap.old_start = op->remap.start;
+ op->remap.old_range = op->remap.range;
flags |= op->base.remap.unmap->va->flags & XE_VMA_CREATE_MASK;
if (op->base.remap.prev) {
@@ -2835,8 +2838,19 @@ static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
xe_svm_notifier_lock(vm);
vma->gpuva.flags &= ~XE_VMA_DESTROYED;
xe_svm_notifier_unlock(vm);
- if (post_commit)
+ if (post_commit) {
+ /*
+ * Restore the old va range, in case of the
+ * prev/next skip optimisation. Otherwise what
+ * we re-insert here could be smaller than the
+ * original range.
+ */
+ op->base.remap.unmap->va->va.addr =
+ op->remap.old_start;
+ op->base.remap.unmap->va->va.range =
+ op->remap.old_range;
xe_vm_insert_vma(vm, vma);
+ }
}
break;
}
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 437f64202f3b..e2946e311d7a 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -373,6 +373,10 @@ struct xe_vma_op_remap {
u64 start;
/** @range: range of the VMA unmap */
u64 range;
+ /** @old_start: Original start of the VMA we unmap */
+ u64 old_start;
+ /** @old_range: Original range of the VMA we unmap */
+ u64 old_range;
/** @skip_prev: skip prev rebind */
bool skip_prev;
/** @skip_next: skip next rebind */
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 462c2fa712e0..d7e309ad9aba 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -247,7 +247,8 @@ static const struct xe_rtp_entry_sr gt_was[] = {
LSN_DIM_Z_WGT_MASK,
LSN_LNI_WGT(1) | LSN_LNE_WGT(1) |
LSN_DIM_X_WGT(1) | LSN_DIM_Y_WGT(1) |
- LSN_DIM_Z_WGT(1)))
+ LSN_DIM_Z_WGT(1)),
+ SET(LSC_CHICKEN_BIT_0_UDW, L3_128B_256B_WRT_DIS))
},
/* Xe2_HPM */
diff --git a/drivers/hwmon/adm1177.c b/drivers/hwmon/adm1177.c
index 8b2c965480e3..7888afe8dafd 100644
--- a/drivers/hwmon/adm1177.c
+++ b/drivers/hwmon/adm1177.c
@@ -10,6 +10,8 @@
#include <linux/hwmon.h>
#include <linux/i2c.h>
#include <linux/init.h>
+#include <linux/math64.h>
+#include <linux/minmax.h>
#include <linux/module.h>
#include <linux/regulator/consumer.h>
@@ -33,7 +35,7 @@
struct adm1177_state {
struct i2c_client *client;
u32 r_sense_uohm;
- u32 alert_threshold_ua;
+ u64 alert_threshold_ua;
bool vrange_high;
};
@@ -48,7 +50,7 @@ static int adm1177_write_cmd(struct adm1177_state *st, u8 cmd)
}
static int adm1177_write_alert_thr(struct adm1177_state *st,
- u32 alert_threshold_ua)
+ u64 alert_threshold_ua)
{
u64 val;
int ret;
@@ -91,8 +93,8 @@ static int adm1177_read(struct device *dev, enum hwmon_sensor_types type,
*val = div_u64((105840000ull * dummy),
4096 * st->r_sense_uohm);
return 0;
- case hwmon_curr_max_alarm:
- *val = st->alert_threshold_ua;
+ case hwmon_curr_max:
+ *val = div_u64(st->alert_threshold_ua, 1000);
return 0;
default:
return -EOPNOTSUPP;
@@ -126,9 +128,10 @@ static int adm1177_write(struct device *dev, enum hwmon_sensor_types type,
switch (type) {
case hwmon_curr:
switch (attr) {
- case hwmon_curr_max_alarm:
- adm1177_write_alert_thr(st, val);
- return 0;
+ case hwmon_curr_max:
+ val = clamp_val(val, 0,
+ div_u64(105840000ULL, st->r_sense_uohm));
+ return adm1177_write_alert_thr(st, (u64)val * 1000);
default:
return -EOPNOTSUPP;
}
@@ -156,7 +159,7 @@ static umode_t adm1177_is_visible(const void *data,
if (st->r_sense_uohm)
return 0444;
return 0;
- case hwmon_curr_max_alarm:
+ case hwmon_curr_max:
if (st->r_sense_uohm)
return 0644;
return 0;
@@ -170,7 +173,7 @@ static umode_t adm1177_is_visible(const void *data,
static const struct hwmon_channel_info * const adm1177_info[] = {
HWMON_CHANNEL_INFO(curr,
- HWMON_C_INPUT | HWMON_C_MAX_ALARM),
+ HWMON_C_INPUT | HWMON_C_MAX),
HWMON_CHANNEL_INFO(in,
HWMON_I_INPUT),
NULL
@@ -192,7 +195,8 @@ static int adm1177_probe(struct i2c_client *client)
struct device *dev = &client->dev;
struct device *hwmon_dev;
struct adm1177_state *st;
- u32 alert_threshold_ua;
+ u64 alert_threshold_ua;
+ u32 prop;
int ret;
st = devm_kzalloc(dev, sizeof(*st), GFP_KERNEL);
@@ -208,22 +212,26 @@ static int adm1177_probe(struct i2c_client *client)
if (device_property_read_u32(dev, "shunt-resistor-micro-ohms",
&st->r_sense_uohm))
st->r_sense_uohm = 0;
- if (device_property_read_u32(dev, "adi,shutdown-threshold-microamp",
- &alert_threshold_ua)) {
- if (st->r_sense_uohm)
- /*
- * set maximum default value from datasheet based on
- * shunt-resistor
- */
- alert_threshold_ua = div_u64(105840000000,
- st->r_sense_uohm);
- else
- alert_threshold_ua = 0;
+ if (!device_property_read_u32(dev, "adi,shutdown-threshold-microamp",
+ &prop)) {
+ alert_threshold_ua = prop;
+ } else if (st->r_sense_uohm) {
+ /*
+ * set maximum default value from datasheet based on
+ * shunt-resistor
+ */
+ alert_threshold_ua = div_u64(105840000000ULL,
+ st->r_sense_uohm);
+ } else {
+ alert_threshold_ua = 0;
}
st->vrange_high = device_property_read_bool(dev,
"adi,vrange-high-enable");
- if (alert_threshold_ua && st->r_sense_uohm)
- adm1177_write_alert_thr(st, alert_threshold_ua);
+ if (alert_threshold_ua && st->r_sense_uohm) {
+ ret = adm1177_write_alert_thr(st, alert_threshold_ua);
+ if (ret)
+ return ret;
+ }
ret = adm1177_write_cmd(st, ADM1177_CMD_V_CONT |
ADM1177_CMD_I_CONT |
diff --git a/drivers/hwmon/peci/cputemp.c b/drivers/hwmon/peci/cputemp.c
index b2fc936851e1..457089c561b4 100644
--- a/drivers/hwmon/peci/cputemp.c
+++ b/drivers/hwmon/peci/cputemp.c
@@ -131,7 +131,7 @@ static int get_temp_target(struct peci_cputemp *priv, enum peci_temp_target_type
*val = priv->temp.target.tjmax;
break;
case crit_hyst_type:
- *val = priv->temp.target.tjmax - priv->temp.target.tcontrol;
+ *val = priv->temp.target.tcontrol;
break;
default:
ret = -EOPNOTSUPP;
@@ -319,7 +319,7 @@ static umode_t cputemp_is_visible(const void *data, enum hwmon_sensor_types type
{
const struct peci_cputemp *priv = data;
- if (channel > CPUTEMP_CHANNEL_NUMS)
+ if (channel >= CPUTEMP_CHANNEL_NUMS)
return 0;
if (channel < channel_core)
diff --git a/drivers/hwmon/pmbus/ina233.c b/drivers/hwmon/pmbus/ina233.c
index 2d8b5a5347ed..7aebd854763a 100644
--- a/drivers/hwmon/pmbus/ina233.c
+++ b/drivers/hwmon/pmbus/ina233.c
@@ -72,7 +72,8 @@ static int ina233_read_word_data(struct i2c_client *client, int page,
/* Adjust returned value to match VIN coefficients */
/* VIN: 1.25 mV VSHUNT: 2.5 uV LSB */
- ret = DIV_ROUND_CLOSEST(ret * 25, 12500);
+ ret = clamp_val(DIV_ROUND_CLOSEST((s16)ret * 25, 12500),
+ S16_MIN, S16_MAX) & 0xffff;
break;
default:
ret = -ENODATA;
diff --git a/drivers/hwmon/pmbus/isl68137.c b/drivers/hwmon/pmbus/isl68137.c
index e7dac26b5be6..3e3a887aad05 100644
--- a/drivers/hwmon/pmbus/isl68137.c
+++ b/drivers/hwmon/pmbus/isl68137.c
@@ -96,7 +96,15 @@ static ssize_t isl68137_avs_enable_show_page(struct i2c_client *client,
int page,
char *buf)
{
- int val = pmbus_read_byte_data(client, page, PMBUS_OPERATION);
+ int val;
+
+ val = pmbus_lock_interruptible(client);
+ if (val)
+ return val;
+
+ val = pmbus_read_byte_data(client, page, PMBUS_OPERATION);
+
+ pmbus_unlock(client);
if (val < 0)
return val;
@@ -118,6 +126,10 @@ static ssize_t isl68137_avs_enable_store_page(struct i2c_client *client,
op_val = result ? ISL68137_VOUT_AVS : 0;
+ rc = pmbus_lock_interruptible(client);
+ if (rc)
+ return rc;
+
/*
* Writes to VOUT setpoint over AVSBus will persist after the VRM is
* switched to PMBus control. Switching back to AVSBus control
@@ -129,17 +141,20 @@ static ssize_t isl68137_avs_enable_store_page(struct i2c_client *client,
rc = pmbus_read_word_data(client, page, 0xff,
PMBUS_VOUT_COMMAND);
if (rc < 0)
- return rc;
+ goto unlock;
rc = pmbus_write_word_data(client, page, PMBUS_VOUT_COMMAND,
rc);
if (rc < 0)
- return rc;
+ goto unlock;
}
rc = pmbus_update_byte_data(client, page, PMBUS_OPERATION,
ISL68137_VOUT_AVS, op_val);
+unlock:
+ pmbus_unlock(client);
+
return (rc < 0) ? rc : count;
}
diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c
index be6d05def115..572be3ebc03d 100644
--- a/drivers/hwmon/pmbus/pmbus_core.c
+++ b/drivers/hwmon/pmbus/pmbus_core.c
@@ -6,6 +6,7 @@
* Copyright (c) 2012 Guenter Roeck
*/
+#include <linux/atomic.h>
#include <linux/debugfs.h>
#include <linux/delay.h>
#include <linux/dcache.h>
@@ -21,8 +22,8 @@
#include <linux/pmbus.h>
#include <linux/regulator/driver.h>
#include <linux/regulator/machine.h>
-#include <linux/of.h>
#include <linux/thermal.h>
+#include <linux/workqueue.h>
#include "pmbus.h"
/*
@@ -112,6 +113,11 @@ struct pmbus_data {
struct mutex update_lock;
+#if IS_ENABLED(CONFIG_REGULATOR)
+ atomic_t regulator_events[PMBUS_PAGES];
+ struct work_struct regulator_notify_work;
+#endif
+
bool has_status_word; /* device uses STATUS_WORD register */
int (*read_status)(struct i2c_client *client, int page);
@@ -1209,6 +1215,12 @@ static ssize_t pmbus_show_boolean(struct device *dev,
return sysfs_emit(buf, "%d\n", val);
}
+static ssize_t pmbus_show_zero(struct device *dev,
+ struct device_attribute *devattr, char *buf)
+{
+ return sysfs_emit(buf, "0\n");
+}
+
static ssize_t pmbus_show_sensor(struct device *dev,
struct device_attribute *devattr, char *buf)
{
@@ -1407,7 +1419,7 @@ static struct pmbus_sensor *pmbus_add_sensor(struct pmbus_data *data,
int reg,
enum pmbus_sensor_classes class,
bool update, bool readonly,
- bool convert)
+ bool writeonly, bool convert)
{
struct pmbus_sensor *sensor;
struct device_attribute *a;
@@ -1436,7 +1448,8 @@ static struct pmbus_sensor *pmbus_add_sensor(struct pmbus_data *data,
sensor->data = -ENODATA;
pmbus_dev_attr_init(a, sensor->name,
readonly ? 0444 : 0644,
- pmbus_show_sensor, pmbus_set_sensor);
+ writeonly ? pmbus_show_zero : pmbus_show_sensor,
+ pmbus_set_sensor);
if (pmbus_add_attribute(data, &a->attr))
return NULL;
@@ -1495,8 +1508,10 @@ static int pmbus_add_label(struct pmbus_data *data,
struct pmbus_limit_attr {
u16 reg; /* Limit register */
u16 sbit; /* Alarm attribute status bit */
- bool update; /* True if register needs updates */
- bool low; /* True if low limit; for limits with compare functions only */
+ bool readonly:1; /* True if the attribute is read-only */
+ bool writeonly:1; /* True if the attribute is write-only */
+ bool update:1; /* True if register needs updates */
+ bool low:1; /* True if low limit; for limits with compare functions only */
const char *attr; /* Attribute name */
const char *alarm; /* Alarm attribute name */
};
@@ -1511,9 +1526,9 @@ struct pmbus_sensor_attr {
u8 nlimit; /* # of limit registers */
enum pmbus_sensor_classes class;/* sensor class */
const char *label; /* sensor label */
- bool paged; /* true if paged sensor */
- bool update; /* true if update needed */
- bool compare; /* true if compare function needed */
+ bool paged:1; /* true if paged sensor */
+ bool update:1; /* true if update needed */
+ bool compare:1; /* true if compare function needed */
u32 func; /* sensor mask */
u32 sfunc; /* sensor status mask */
int sreg; /* status register */
@@ -1544,7 +1559,7 @@ static int pmbus_add_limit_attrs(struct i2c_client *client,
curr = pmbus_add_sensor(data, name, l->attr, index,
page, 0xff, l->reg, attr->class,
attr->update || l->update,
- false, true);
+ l->readonly, l->writeonly, true);
if (!curr)
return -ENOMEM;
if (l->sbit && (info->func[page] & attr->sfunc)) {
@@ -1584,7 +1599,7 @@ static int pmbus_add_sensor_attrs_one(struct i2c_client *client,
return ret;
}
base = pmbus_add_sensor(data, name, "input", index, page, phase,
- attr->reg, attr->class, true, true, true);
+ attr->reg, attr->class, true, true, false, true);
if (!base)
return -ENOMEM;
/* No limit and alarm attributes for phase specific sensors */
@@ -1707,23 +1722,29 @@ static const struct pmbus_limit_attr vin_limit_attrs[] = {
}, {
.reg = PMBUS_VIRT_READ_VIN_AVG,
.update = true,
+ .readonly = true,
.attr = "average",
}, {
.reg = PMBUS_VIRT_READ_VIN_MIN,
.update = true,
+ .readonly = true,
.attr = "lowest",
}, {
.reg = PMBUS_VIRT_READ_VIN_MAX,
.update = true,
+ .readonly = true,
.attr = "highest",
}, {
.reg = PMBUS_VIRT_RESET_VIN_HISTORY,
+ .writeonly = true,
.attr = "reset_history",
}, {
.reg = PMBUS_MFR_VIN_MIN,
+ .readonly = true,
.attr = "rated_min",
}, {
.reg = PMBUS_MFR_VIN_MAX,
+ .readonly = true,
.attr = "rated_max",
},
};
@@ -1776,23 +1797,29 @@ static const struct pmbus_limit_attr vout_limit_attrs[] = {
}, {
.reg = PMBUS_VIRT_READ_VOUT_AVG,
.update = true,
+ .readonly = true,
.attr = "average",
}, {
.reg = PMBUS_VIRT_READ_VOUT_MIN,
.update = true,
+ .readonly = true,
.attr = "lowest",
}, {
.reg = PMBUS_VIRT_READ_VOUT_MAX,
.update = true,
+ .readonly = true,
.attr = "highest",
}, {
.reg = PMBUS_VIRT_RESET_VOUT_HISTORY,
+ .writeonly = true,
.attr = "reset_history",
}, {
.reg = PMBUS_MFR_VOUT_MIN,
+ .readonly = true,
.attr = "rated_min",
}, {
.reg = PMBUS_MFR_VOUT_MAX,
+ .readonly = true,
.attr = "rated_max",
},
};
@@ -1852,20 +1879,25 @@ static const struct pmbus_limit_attr iin_limit_attrs[] = {
}, {
.reg = PMBUS_VIRT_READ_IIN_AVG,
.update = true,
+ .readonly = true,
.attr = "average",
}, {
.reg = PMBUS_VIRT_READ_IIN_MIN,
.update = true,
+ .readonly = true,
.attr = "lowest",
}, {
.reg = PMBUS_VIRT_READ_IIN_MAX,
.update = true,
+ .readonly = true,
.attr = "highest",
}, {
.reg = PMBUS_VIRT_RESET_IIN_HISTORY,
+ .writeonly = true,
.attr = "reset_history",
}, {
.reg = PMBUS_MFR_IIN_MAX,
+ .readonly = true,
.attr = "rated_max",
},
};
@@ -1889,20 +1921,25 @@ static const struct pmbus_limit_attr iout_limit_attrs[] = {
}, {
.reg = PMBUS_VIRT_READ_IOUT_AVG,
.update = true,
+ .readonly = true,
.attr = "average",
}, {
.reg = PMBUS_VIRT_READ_IOUT_MIN,
.update = true,
+ .readonly = true,
.attr = "lowest",
}, {
.reg = PMBUS_VIRT_READ_IOUT_MAX,
.update = true,
+ .readonly = true,
.attr = "highest",
}, {
.reg = PMBUS_VIRT_RESET_IOUT_HISTORY,
+ .writeonly = true,
.attr = "reset_history",
}, {
.reg = PMBUS_MFR_IOUT_MAX,
+ .readonly = true,
.attr = "rated_max",
},
};
@@ -1943,20 +1980,25 @@ static const struct pmbus_limit_attr pin_limit_attrs[] = {
}, {
.reg = PMBUS_VIRT_READ_PIN_AVG,
.update = true,
+ .readonly = true,
.attr = "average",
}, {
.reg = PMBUS_VIRT_READ_PIN_MIN,
.update = true,
+ .readonly = true,
.attr = "input_lowest",
}, {
.reg = PMBUS_VIRT_READ_PIN_MAX,
.update = true,
+ .readonly = true,
.attr = "input_highest",
}, {
.reg = PMBUS_VIRT_RESET_PIN_HISTORY,
+ .writeonly = true,
.attr = "reset_history",
}, {
.reg = PMBUS_MFR_PIN_MAX,
+ .readonly = true,
.attr = "rated_max",
},
};
@@ -1980,20 +2022,25 @@ static const struct pmbus_limit_attr pout_limit_attrs[] = {
}, {
.reg = PMBUS_VIRT_READ_POUT_AVG,
.update = true,
+ .readonly = true,
.attr = "average",
}, {
.reg = PMBUS_VIRT_READ_POUT_MIN,
.update = true,
+ .readonly = true,
.attr = "input_lowest",
}, {
.reg = PMBUS_VIRT_READ_POUT_MAX,
.update = true,
+ .readonly = true,
.attr = "input_highest",
}, {
.reg = PMBUS_VIRT_RESET_POUT_HISTORY,
+ .writeonly = true,
.attr = "reset_history",
}, {
.reg = PMBUS_MFR_POUT_MAX,
+ .readonly = true,
.attr = "rated_max",
},
};
@@ -2049,18 +2096,23 @@ static const struct pmbus_limit_attr temp_limit_attrs[] = {
.sbit = PB_TEMP_OT_FAULT,
}, {
.reg = PMBUS_VIRT_READ_TEMP_MIN,
+ .readonly = true,
.attr = "lowest",
}, {
.reg = PMBUS_VIRT_READ_TEMP_AVG,
+ .readonly = true,
.attr = "average",
}, {
.reg = PMBUS_VIRT_READ_TEMP_MAX,
+ .readonly = true,
.attr = "highest",
}, {
.reg = PMBUS_VIRT_RESET_TEMP_HISTORY,
+ .writeonly = true,
.attr = "reset_history",
}, {
.reg = PMBUS_MFR_MAX_TEMP_1,
+ .readonly = true,
.attr = "rated_max",
},
};
@@ -2090,18 +2142,23 @@ static const struct pmbus_limit_attr temp_limit_attrs2[] = {
.sbit = PB_TEMP_OT_FAULT,
}, {
.reg = PMBUS_VIRT_READ_TEMP2_MIN,
+ .readonly = true,
.attr = "lowest",
}, {
.reg = PMBUS_VIRT_READ_TEMP2_AVG,
+ .readonly = true,
.attr = "average",
}, {
.reg = PMBUS_VIRT_READ_TEMP2_MAX,
+ .readonly = true,
.attr = "highest",
}, {
.reg = PMBUS_VIRT_RESET_TEMP2_HISTORY,
+ .writeonly = true,
.attr = "reset_history",
}, {
.reg = PMBUS_MFR_MAX_TEMP_2,
+ .readonly = true,
.attr = "rated_max",
},
};
@@ -2131,6 +2188,7 @@ static const struct pmbus_limit_attr temp_limit_attrs3[] = {
.sbit = PB_TEMP_OT_FAULT,
}, {
.reg = PMBUS_MFR_MAX_TEMP_3,
+ .readonly = true,
.attr = "rated_max",
},
};
@@ -2214,7 +2272,7 @@ static int pmbus_add_fan_ctrl(struct i2c_client *client,
sensor = pmbus_add_sensor(data, "fan", "target", index, page,
0xff, PMBUS_VIRT_FAN_TARGET_1 + id, PSC_FAN,
- false, false, true);
+ false, false, false, true);
if (!sensor)
return -ENOMEM;
@@ -2225,14 +2283,14 @@ static int pmbus_add_fan_ctrl(struct i2c_client *client,
sensor = pmbus_add_sensor(data, "pwm", NULL, index, page,
0xff, PMBUS_VIRT_PWM_1 + id, PSC_PWM,
- false, false, true);
+ false, false, false, true);
if (!sensor)
return -ENOMEM;
sensor = pmbus_add_sensor(data, "pwm", "enable", index, page,
0xff, PMBUS_VIRT_PWM_ENABLE_1 + id, PSC_PWM,
- true, false, false);
+ true, false, false, false);
if (!sensor)
return -ENOMEM;
@@ -2274,7 +2332,7 @@ static int pmbus_add_fan_attributes(struct i2c_client *client,
if (pmbus_add_sensor(data, "fan", "input", index,
page, 0xff, pmbus_fan_registers[f],
- PSC_FAN, true, true, true) == NULL)
+ PSC_FAN, true, true, false, true) == NULL)
return -ENOMEM;
/* Fan control */
@@ -3176,12 +3234,19 @@ static int pmbus_regulator_get_voltage(struct regulator_dev *rdev)
.class = PSC_VOLTAGE_OUT,
.convert = true,
};
+ int ret;
+ mutex_lock(&data->update_lock);
s.data = _pmbus_read_word_data(client, s.page, 0xff, PMBUS_READ_VOUT);
- if (s.data < 0)
- return s.data;
+ if (s.data < 0) {
+ ret = s.data;
+ goto unlock;
+ }
- return (int)pmbus_reg2data(data, &s) * 1000; /* unit is uV */
+ ret = (int)pmbus_reg2data(data, &s) * 1000; /* unit is uV */
+unlock:
+ mutex_unlock(&data->update_lock);
+ return ret;
}
static int pmbus_regulator_set_voltage(struct regulator_dev *rdev, int min_uv,
@@ -3198,16 +3263,22 @@ static int pmbus_regulator_set_voltage(struct regulator_dev *rdev, int min_uv,
};
int val = DIV_ROUND_CLOSEST(min_uv, 1000); /* convert to mV */
int low, high;
+ int ret;
*selector = 0;
+ mutex_lock(&data->update_lock);
low = pmbus_regulator_get_low_margin(client, s.page);
- if (low < 0)
- return low;
+ if (low < 0) {
+ ret = low;
+ goto unlock;
+ }
high = pmbus_regulator_get_high_margin(client, s.page);
- if (high < 0)
- return high;
+ if (high < 0) {
+ ret = high;
+ goto unlock;
+ }
/* Make sure we are within margins */
if (low > val)
@@ -3217,7 +3288,10 @@ static int pmbus_regulator_set_voltage(struct regulator_dev *rdev, int min_uv,
val = pmbus_data2reg(data, &s, val);
- return _pmbus_write_word_data(client, s.page, PMBUS_VOUT_COMMAND, (u16)val);
+ ret = _pmbus_write_word_data(client, s.page, PMBUS_VOUT_COMMAND, (u16)val);
+unlock:
+ mutex_unlock(&data->update_lock);
+ return ret;
}
static int pmbus_regulator_list_voltage(struct regulator_dev *rdev,
@@ -3227,6 +3301,7 @@ static int pmbus_regulator_list_voltage(struct regulator_dev *rdev,
struct i2c_client *client = to_i2c_client(dev->parent);
struct pmbus_data *data = i2c_get_clientdata(client);
int val, low, high;
+ int ret;
if (data->flags & PMBUS_VOUT_PROTECTED)
return 0;
@@ -3239,18 +3314,29 @@ static int pmbus_regulator_list_voltage(struct regulator_dev *rdev,
val = DIV_ROUND_CLOSEST(rdev->desc->min_uV +
(rdev->desc->uV_step * selector), 1000); /* convert to mV */
+ mutex_lock(&data->update_lock);
+
low = pmbus_regulator_get_low_margin(client, rdev_get_id(rdev));
- if (low < 0)
- return low;
+ if (low < 0) {
+ ret = low;
+ goto unlock;
+ }
high = pmbus_regulator_get_high_margin(client, rdev_get_id(rdev));
- if (high < 0)
- return high;
+ if (high < 0) {
+ ret = high;
+ goto unlock;
+ }
- if (val >= low && val <= high)
- return val * 1000; /* unit is uV */
+ if (val >= low && val <= high) {
+ ret = val * 1000; /* unit is uV */
+ goto unlock;
+ }
- return 0;
+ ret = 0;
+unlock:
+ mutex_unlock(&data->update_lock);
+ return ret;
}
const struct regulator_ops pmbus_regulator_ops = {
@@ -3281,12 +3367,42 @@ int pmbus_regulator_init_cb(struct regulator_dev *rdev,
}
EXPORT_SYMBOL_NS_GPL(pmbus_regulator_init_cb, "PMBUS");
+static void pmbus_regulator_notify_work_cancel(void *data)
+{
+ struct pmbus_data *pdata = data;
+
+ cancel_work_sync(&pdata->regulator_notify_work);
+}
+
+static void pmbus_regulator_notify_worker(struct work_struct *work)
+{
+ struct pmbus_data *data =
+ container_of(work, struct pmbus_data, regulator_notify_work);
+ int i, j;
+
+ for (i = 0; i < data->info->pages; i++) {
+ int event;
+
+ event = atomic_xchg(&data->regulator_events[i], 0);
+ if (!event)
+ continue;
+
+ for (j = 0; j < data->info->num_regulators; j++) {
+ if (i == rdev_get_id(data->rdevs[j])) {
+ regulator_notifier_call_chain(data->rdevs[j],
+ event, NULL);
+ break;
+ }
+ }
+ }
+}
+
static int pmbus_regulator_register(struct pmbus_data *data)
{
struct device *dev = data->dev;
const struct pmbus_driver_info *info = data->info;
const struct pmbus_platform_data *pdata = dev_get_platdata(dev);
- int i;
+ int i, ret;
data->rdevs = devm_kzalloc(dev, sizeof(struct regulator_dev *) * info->num_regulators,
GFP_KERNEL);
@@ -3310,19 +3426,19 @@ static int pmbus_regulator_register(struct pmbus_data *data)
info->reg_desc[i].name);
}
+ INIT_WORK(&data->regulator_notify_work, pmbus_regulator_notify_worker);
+
+ ret = devm_add_action_or_reset(dev, pmbus_regulator_notify_work_cancel, data);
+ if (ret)
+ return ret;
+
return 0;
}
static void pmbus_regulator_notify(struct pmbus_data *data, int page, int event)
{
- int j;
-
- for (j = 0; j < data->info->num_regulators; j++) {
- if (page == rdev_get_id(data->rdevs[j])) {
- regulator_notifier_call_chain(data->rdevs[j], event, NULL);
- break;
- }
- }
+ atomic_or(event, &data->regulator_events[page]);
+ schedule_work(&data->regulator_notify_work);
}
#else
static int pmbus_regulator_register(struct pmbus_data *data)
diff --git a/drivers/i2c/busses/i2c-designware-amdisp.c b/drivers/i2c/busses/i2c-designware-amdisp.c
index c48728ad9f6f..9f0ec0fae6f2 100644
--- a/drivers/i2c/busses/i2c-designware-amdisp.c
+++ b/drivers/i2c/busses/i2c-designware-amdisp.c
@@ -7,6 +7,7 @@
#include <linux/module.h>
#include <linux/platform_device.h>
+#include <linux/pm_domain.h>
#include <linux/pm_runtime.h>
#include <linux/soc/amd/isp4_misc.h>
@@ -76,22 +77,20 @@ static int amd_isp_dw_i2c_plat_probe(struct platform_device *pdev)
device_enable_async_suspend(&pdev->dev);
- pm_runtime_enable(&pdev->dev);
- pm_runtime_get_sync(&pdev->dev);
-
+ dev_pm_genpd_resume(&pdev->dev);
ret = i2c_dw_probe(isp_i2c_dev);
if (ret) {
dev_err_probe(&pdev->dev, ret, "i2c_dw_probe failed\n");
goto error_release_rpm;
}
-
- pm_runtime_put_sync(&pdev->dev);
+ dev_pm_genpd_suspend(&pdev->dev);
+ pm_runtime_set_suspended(&pdev->dev);
+ pm_runtime_enable(&pdev->dev);
return 0;
error_release_rpm:
amd_isp_dw_i2c_plat_pm_cleanup(isp_i2c_dev);
- pm_runtime_put_sync(&pdev->dev);
return ret;
}
diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c
index 85f554044cf1..452d120a210b 100644
--- a/drivers/i2c/busses/i2c-imx.c
+++ b/drivers/i2c/busses/i2c-imx.c
@@ -1018,8 +1018,9 @@ static inline int i2c_imx_isr_read(struct imx_i2c_struct *i2c_imx)
return 0;
}
-static inline void i2c_imx_isr_read_continue(struct imx_i2c_struct *i2c_imx)
+static inline enum imx_i2c_state i2c_imx_isr_read_continue(struct imx_i2c_struct *i2c_imx)
{
+ enum imx_i2c_state next_state = IMX_I2C_STATE_READ_CONTINUE;
unsigned int temp;
if ((i2c_imx->msg->len - 1) == i2c_imx->msg_buf_idx) {
@@ -1033,18 +1034,20 @@ static inline void i2c_imx_isr_read_continue(struct imx_i2c_struct *i2c_imx)
i2c_imx->stopped = 1;
temp &= ~(I2CR_MSTA | I2CR_MTX);
imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR);
- } else {
- /*
- * For i2c master receiver repeat restart operation like:
- * read -> repeat MSTA -> read/write
- * The controller must set MTX before read the last byte in
- * the first read operation, otherwise the first read cost
- * one extra clock cycle.
- */
- temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR);
- temp |= I2CR_MTX;
- imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR);
+
+ return IMX_I2C_STATE_DONE;
}
+ /*
+ * For i2c master receiver repeat restart operation like:
+ * read -> repeat MSTA -> read/write
+ * The controller must set MTX before read the last byte in
+ * the first read operation, otherwise the first read cost
+ * one extra clock cycle.
+ */
+ temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR);
+ temp |= I2CR_MTX;
+ imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR);
+ next_state = IMX_I2C_STATE_DONE;
} else if (i2c_imx->msg_buf_idx == (i2c_imx->msg->len - 2)) {
temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR);
temp |= I2CR_TXAK;
@@ -1052,6 +1055,7 @@ static inline void i2c_imx_isr_read_continue(struct imx_i2c_struct *i2c_imx)
}
i2c_imx->msg->buf[i2c_imx->msg_buf_idx++] = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2DR);
+ return next_state;
}
static inline void i2c_imx_isr_read_block_data_len(struct imx_i2c_struct *i2c_imx)
@@ -1088,11 +1092,9 @@ static irqreturn_t i2c_imx_master_isr(struct imx_i2c_struct *i2c_imx, unsigned i
break;
case IMX_I2C_STATE_READ_CONTINUE:
- i2c_imx_isr_read_continue(i2c_imx);
- if (i2c_imx->msg_buf_idx == i2c_imx->msg->len) {
- i2c_imx->state = IMX_I2C_STATE_DONE;
+ i2c_imx->state = i2c_imx_isr_read_continue(i2c_imx);
+ if (i2c_imx->state == IMX_I2C_STATE_DONE)
wake_up(&i2c_imx->queue);
- }
break;
case IMX_I2C_STATE_READ_BLOCK_DATA:
@@ -1490,6 +1492,7 @@ static int i2c_imx_read(struct imx_i2c_struct *i2c_imx, struct i2c_msg *msgs,
bool is_lastmsg)
{
int block_data = msgs->flags & I2C_M_RECV_LEN;
+ int ret = 0;
dev_dbg(&i2c_imx->adapter.dev,
"<%s> write slave address: addr=0x%x\n",
@@ -1522,10 +1525,20 @@ static int i2c_imx_read(struct imx_i2c_struct *i2c_imx, struct i2c_msg *msgs,
dev_err(&i2c_imx->adapter.dev, "<%s> read timedout\n", __func__);
return -ETIMEDOUT;
}
- if (!i2c_imx->stopped)
- return i2c_imx_bus_busy(i2c_imx, 0, false);
+ if (i2c_imx->is_lastmsg) {
+ if (!i2c_imx->stopped)
+ ret = i2c_imx_bus_busy(i2c_imx, 0, false);
+ /*
+ * Only read the last byte of the last message after the bus is
+ * not busy. Else the controller generates another clock which
+ * might confuse devices.
+ */
+ if (!ret)
+ i2c_imx->msg->buf[i2c_imx->msg_buf_idx++] = imx_i2c_read_reg(i2c_imx,
+ IMX_I2C_I2DR);
+ }
- return 0;
+ return ret;
}
static int i2c_imx_xfer_common(struct i2c_adapter *adapter,
diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c
index fc45c384833f..4fafe393a48c 100644
--- a/drivers/infiniband/core/rw.c
+++ b/drivers/infiniband/core/rw.c
@@ -608,14 +608,29 @@ int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num,
if (rdma_rw_io_needs_mr(qp->device, port_num, dir, sg_cnt)) {
ret = rdma_rw_init_mr_wrs(ctx, qp, port_num, sg, sg_cnt,
sg_offset, remote_addr, rkey, dir);
- } else if (sg_cnt > 1) {
+ /*
+ * If MR init succeeded or failed for a reason other
+ * than pool exhaustion, that result is final.
+ *
+ * Pool exhaustion (-EAGAIN) from the max_sgl_rd
+ * optimization is recoverable: fall back to
+ * direct SGE posting. iWARP and force_mr require
+ * MRs unconditionally, so -EAGAIN is terminal.
+ */
+ if (ret != -EAGAIN ||
+ rdma_protocol_iwarp(qp->device, port_num) ||
+ unlikely(rdma_rw_force_mr))
+ goto out;
+ }
+
+ if (sg_cnt > 1)
ret = rdma_rw_init_map_wrs(ctx, qp, sg, sg_cnt, sg_offset,
remote_addr, rkey, dir);
- } else {
+ else
ret = rdma_rw_init_single_wr(ctx, qp, sg, sg_offset,
remote_addr, rkey, dir);
- }
+out:
if (ret < 0)
goto out_unmap_sg;
return ret;
@@ -686,14 +701,16 @@ int rdma_rw_ctx_init_bvec(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
return ret;
/*
- * IOVA mapping not available. Check if MR registration provides
- * better performance than multiple SGE entries.
+ * IOVA not available; fall back to the map_wrs path, which maps
+ * each bvec as a direct SGE. This is always correct: the MR path
+ * is a throughput optimization, not a correctness requirement.
+ * (iWARP, which does require MRs, is handled by the check above.)
+ *
+ * The rdma_rw_io_needs_mr() gate is not used here because nr_bvec
+ * is a raw page count that overstates DMA entry demand -- the bvec
+ * caller has no post-DMA-coalescing segment count, and feeding the
+ * inflated count into the MR path exhausts the pool on RDMA READs.
*/
- if (rdma_rw_io_needs_mr(dev, port_num, dir, nr_bvec))
- return rdma_rw_init_mr_wrs_bvec(ctx, qp, port_num, bvecs,
- nr_bvec, &iter, remote_addr,
- rkey, dir);
-
return rdma_rw_init_map_wrs_bvec(ctx, qp, bvecs, nr_bvec, &iter,
remote_addr, rkey, dir);
}
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index cff4fcca2c34..edc34c69f0f2 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -55,7 +55,8 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
if (dirty)
ib_dma_unmap_sgtable_attrs(dev, &umem->sgt_append.sgt,
- DMA_BIDIRECTIONAL, 0);
+ DMA_BIDIRECTIONAL,
+ DMA_ATTR_REQUIRE_COHERENT);
for_each_sgtable_sg(&umem->sgt_append.sgt, sg, i) {
unpin_user_page_range_dirty_lock(sg_page(sg),
@@ -169,7 +170,7 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
unsigned long lock_limit;
unsigned long new_pinned;
unsigned long cur_base;
- unsigned long dma_attr = 0;
+ unsigned long dma_attr = DMA_ATTR_REQUIRE_COHERENT;
struct mm_struct *mm;
unsigned long npages;
int pinned, ret;
diff --git a/drivers/infiniband/hw/bng_re/bng_dev.c b/drivers/infiniband/hw/bng_re/bng_dev.c
index d34b5f88cd40..71a7ca2196ad 100644
--- a/drivers/infiniband/hw/bng_re/bng_dev.c
+++ b/drivers/infiniband/hw/bng_re/bng_dev.c
@@ -210,7 +210,7 @@ static int bng_re_stats_ctx_alloc(struct bng_re_dev *rdev)
return rc;
}
-static void bng_re_query_hwrm_version(struct bng_re_dev *rdev)
+static int bng_re_query_hwrm_version(struct bng_re_dev *rdev)
{
struct bnge_auxr_dev *aux_dev = rdev->aux_dev;
struct hwrm_ver_get_output ver_get_resp = {};
@@ -230,7 +230,7 @@ static void bng_re_query_hwrm_version(struct bng_re_dev *rdev)
if (rc) {
ibdev_err(&rdev->ibdev, "Failed to query HW version, rc = 0x%x",
rc);
- return;
+ return rc;
}
cctx = rdev->chip_ctx;
@@ -244,6 +244,8 @@ static void bng_re_query_hwrm_version(struct bng_re_dev *rdev)
if (!cctx->hwrm_cmd_max_timeout)
cctx->hwrm_cmd_max_timeout = BNG_ROCE_FW_MAX_TIMEOUT;
+
+ return 0;
}
static void bng_re_dev_uninit(struct bng_re_dev *rdev)
@@ -306,13 +308,15 @@ static int bng_re_dev_init(struct bng_re_dev *rdev)
goto msix_ctx_fail;
}
- bng_re_query_hwrm_version(rdev);
+ rc = bng_re_query_hwrm_version(rdev);
+ if (rc)
+ goto destroy_chip_ctx;
rc = bng_re_alloc_fw_channel(&rdev->bng_res, &rdev->rcfw);
if (rc) {
ibdev_err(&rdev->ibdev,
"Failed to allocate RCFW Channel: %#x\n", rc);
- goto alloc_fw_chl_fail;
+ goto destroy_chip_ctx;
}
/* Allocate nq record memory */
@@ -391,7 +395,7 @@ free_rcfw:
kfree(rdev->nqr);
nq_alloc_fail:
bng_re_free_rcfw_channel(&rdev->rcfw);
-alloc_fw_chl_fail:
+destroy_chip_ctx:
bng_re_destroy_chip_ctx(rdev);
msix_ctx_fail:
bnge_unregister_dev(rdev->aux_dev);
diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c
index 229b0ad3b0cb..e97b5f0d7003 100644
--- a/drivers/infiniband/hw/efa/efa_com.c
+++ b/drivers/infiniband/hw/efa/efa_com.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
/*
- * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2026 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#include <linux/log2.h>
@@ -310,23 +310,19 @@ static inline struct efa_comp_ctx *efa_com_get_comp_ctx_by_cmd_id(struct efa_com
return &aq->comp_ctx[ctx_id];
}
-static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq,
- struct efa_admin_aq_entry *cmd,
- size_t cmd_size_in_bytes,
- struct efa_admin_acq_entry *comp,
- size_t comp_size_in_bytes)
+static void __efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq,
+ struct efa_comp_ctx *comp_ctx,
+ struct efa_admin_aq_entry *cmd,
+ size_t cmd_size_in_bytes,
+ struct efa_admin_acq_entry *comp,
+ size_t comp_size_in_bytes)
{
struct efa_admin_aq_entry *aqe;
- struct efa_comp_ctx *comp_ctx;
u16 queue_size_mask;
u16 cmd_id;
u16 ctx_id;
u16 pi;
- comp_ctx = efa_com_alloc_comp_ctx(aq);
- if (!comp_ctx)
- return ERR_PTR(-EINVAL);
-
queue_size_mask = aq->depth - 1;
pi = aq->sq.pc & queue_size_mask;
ctx_id = efa_com_get_comp_ctx_id(aq, comp_ctx);
@@ -360,8 +356,6 @@ static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queu
/* barrier not needed in case of writel */
writel(aq->sq.pc, aq->sq.db_addr);
-
- return comp_ctx;
}
static inline int efa_com_init_comp_ctxt(struct efa_com_admin_queue *aq)
@@ -394,28 +388,25 @@ static inline int efa_com_init_comp_ctxt(struct efa_com_admin_queue *aq)
return 0;
}
-static struct efa_comp_ctx *efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq,
- struct efa_admin_aq_entry *cmd,
- size_t cmd_size_in_bytes,
- struct efa_admin_acq_entry *comp,
- size_t comp_size_in_bytes)
+static int efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq,
+ struct efa_comp_ctx *comp_ctx,
+ struct efa_admin_aq_entry *cmd,
+ size_t cmd_size_in_bytes,
+ struct efa_admin_acq_entry *comp,
+ size_t comp_size_in_bytes)
{
- struct efa_comp_ctx *comp_ctx;
-
spin_lock(&aq->sq.lock);
if (!test_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state)) {
ibdev_err_ratelimited(aq->efa_dev, "Admin queue is closed\n");
spin_unlock(&aq->sq.lock);
- return ERR_PTR(-ENODEV);
+ return -ENODEV;
}
- comp_ctx = __efa_com_submit_admin_cmd(aq, cmd, cmd_size_in_bytes, comp,
- comp_size_in_bytes);
+ __efa_com_submit_admin_cmd(aq, comp_ctx, cmd, cmd_size_in_bytes, comp,
+ comp_size_in_bytes);
spin_unlock(&aq->sq.lock);
- if (IS_ERR(comp_ctx))
- clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
- return comp_ctx;
+ return 0;
}
static int efa_com_handle_single_admin_completion(struct efa_com_admin_queue *aq,
@@ -512,7 +503,6 @@ static int efa_com_wait_and_process_admin_cq_polling(struct efa_comp_ctx *comp_c
{
unsigned long timeout;
unsigned long flags;
- int err;
timeout = jiffies + usecs_to_jiffies(aq->completion_timeout);
@@ -532,24 +522,20 @@ static int efa_com_wait_and_process_admin_cq_polling(struct efa_comp_ctx *comp_c
atomic64_inc(&aq->stats.no_completion);
clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
- err = -ETIME;
- goto out;
+ return -ETIME;
}
msleep(aq->poll_interval);
}
- err = efa_com_comp_status_to_errno(comp_ctx->user_cqe->acq_common_descriptor.status);
-out:
- efa_com_dealloc_comp_ctx(aq, comp_ctx);
- return err;
+ return efa_com_comp_status_to_errno(
+ comp_ctx->user_cqe->acq_common_descriptor.status);
}
static int efa_com_wait_and_process_admin_cq_interrupts(struct efa_comp_ctx *comp_ctx,
struct efa_com_admin_queue *aq)
{
unsigned long flags;
- int err;
wait_for_completion_timeout(&comp_ctx->wait_event,
usecs_to_jiffies(aq->completion_timeout));
@@ -585,14 +571,11 @@ static int efa_com_wait_and_process_admin_cq_interrupts(struct efa_comp_ctx *com
aq->cq.cc);
clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
- err = -ETIME;
- goto out;
+ return -ETIME;
}
- err = efa_com_comp_status_to_errno(comp_ctx->user_cqe->acq_common_descriptor.status);
-out:
- efa_com_dealloc_comp_ctx(aq, comp_ctx);
- return err;
+ return efa_com_comp_status_to_errno(
+ comp_ctx->user_cqe->acq_common_descriptor.status);
}
/*
@@ -642,30 +625,39 @@ int efa_com_cmd_exec(struct efa_com_admin_queue *aq,
ibdev_dbg(aq->efa_dev, "%s (opcode %d)\n",
efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
cmd->aq_common_descriptor.opcode);
- comp_ctx = efa_com_submit_admin_cmd(aq, cmd, cmd_size, comp, comp_size);
- if (IS_ERR(comp_ctx)) {
+
+ comp_ctx = efa_com_alloc_comp_ctx(aq);
+ if (!comp_ctx) {
+ clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
+ up(&aq->avail_cmds);
+ return -EINVAL;
+ }
+
+ err = efa_com_submit_admin_cmd(aq, comp_ctx, cmd, cmd_size, comp, comp_size);
+ if (err) {
ibdev_err_ratelimited(
aq->efa_dev,
- "Failed to submit command %s (opcode %u) err %pe\n",
+ "Failed to submit command %s (opcode %u) err %d\n",
efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
- cmd->aq_common_descriptor.opcode, comp_ctx);
+ cmd->aq_common_descriptor.opcode, err);
+ efa_com_dealloc_comp_ctx(aq, comp_ctx);
up(&aq->avail_cmds);
atomic64_inc(&aq->stats.cmd_err);
- return PTR_ERR(comp_ctx);
+ return err;
}
err = efa_com_wait_and_process_admin_cq(comp_ctx, aq);
if (err) {
ibdev_err_ratelimited(
aq->efa_dev,
- "Failed to process command %s (opcode %u) comp_status %d err %d\n",
+ "Failed to process command %s (opcode %u) err %d\n",
efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
- cmd->aq_common_descriptor.opcode,
- comp_ctx->user_cqe->acq_common_descriptor.status, err);
+ cmd->aq_common_descriptor.opcode, err);
atomic64_inc(&aq->stats.cmd_err);
}
+ efa_com_dealloc_comp_ctx(aq, comp_ctx);
up(&aq->avail_cmds);
return err;
diff --git a/drivers/infiniband/hw/ionic/ionic_controlpath.c b/drivers/infiniband/hw/ionic/ionic_controlpath.c
index 4842931f5316..a5671da3db64 100644
--- a/drivers/infiniband/hw/ionic/ionic_controlpath.c
+++ b/drivers/infiniband/hw/ionic/ionic_controlpath.c
@@ -508,6 +508,7 @@ static int ionic_build_hdr(struct ionic_ibdev *dev,
{
const struct ib_global_route *grh;
enum rdma_network_type net;
+ u8 smac[ETH_ALEN];
u16 vlan;
int rc;
@@ -518,7 +519,7 @@ static int ionic_build_hdr(struct ionic_ibdev *dev,
grh = rdma_ah_read_grh(attr);
- rc = rdma_read_gid_l2_fields(grh->sgid_attr, &vlan, &hdr->eth.smac_h[0]);
+ rc = rdma_read_gid_l2_fields(grh->sgid_attr, &vlan, smac);
if (rc)
return rc;
@@ -536,6 +537,7 @@ static int ionic_build_hdr(struct ionic_ibdev *dev,
if (rc)
return rc;
+ ether_addr_copy(hdr->eth.smac_h, smac);
ether_addr_copy(hdr->eth.dmac_h, attr->roce.dmac);
if (net == RDMA_NETWORK_IPV4) {
diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c
index 3d084d4ff577..91c0e7298283 100644
--- a/drivers/infiniband/hw/irdma/cm.c
+++ b/drivers/infiniband/hw/irdma/cm.c
@@ -2241,11 +2241,12 @@ irdma_make_cm_node(struct irdma_cm_core *cm_core, struct irdma_device *iwdev,
int oldarpindex;
int arpindex;
struct net_device *netdev = iwdev->netdev;
+ int ret;
/* create an hte and cm_node for this instance */
cm_node = kzalloc_obj(*cm_node, GFP_ATOMIC);
if (!cm_node)
- return NULL;
+ return ERR_PTR(-ENOMEM);
/* set our node specific transport info */
cm_node->ipv4 = cm_info->ipv4;
@@ -2348,8 +2349,10 @@ irdma_make_cm_node(struct irdma_cm_core *cm_core, struct irdma_device *iwdev,
arpindex = -EINVAL;
}
- if (arpindex < 0)
+ if (arpindex < 0) {
+ ret = -EINVAL;
goto err;
+ }
ether_addr_copy(cm_node->rem_mac,
iwdev->rf->arp_table[arpindex].mac_addr);
@@ -2360,7 +2363,7 @@ irdma_make_cm_node(struct irdma_cm_core *cm_core, struct irdma_device *iwdev,
err:
kfree(cm_node);
- return NULL;
+ return ERR_PTR(ret);
}
static void irdma_destroy_connection(struct irdma_cm_node *cm_node)
@@ -3021,8 +3024,8 @@ static int irdma_create_cm_node(struct irdma_cm_core *cm_core,
/* create a CM connection node */
cm_node = irdma_make_cm_node(cm_core, iwdev, cm_info, NULL);
- if (!cm_node)
- return -ENOMEM;
+ if (IS_ERR(cm_node))
+ return PTR_ERR(cm_node);
/* set our node side to client (active) side */
cm_node->tcp_cntxt.client = 1;
@@ -3219,9 +3222,9 @@ void irdma_receive_ilq(struct irdma_sc_vsi *vsi, struct irdma_puda_buf *rbuf)
cm_info.cm_id = listener->cm_id;
cm_node = irdma_make_cm_node(cm_core, iwdev, &cm_info,
listener);
- if (!cm_node) {
+ if (IS_ERR(cm_node)) {
ibdev_dbg(&cm_core->iwdev->ibdev,
- "CM: allocate node failed\n");
+ "CM: allocate node failed ret=%ld\n", PTR_ERR(cm_node));
refcount_dec(&listener->refcnt);
return;
}
@@ -4239,21 +4242,21 @@ static void irdma_cm_event_handler(struct work_struct *work)
irdma_cm_event_reset(event);
break;
case IRDMA_CM_EVENT_CONNECTED:
- if (!event->cm_node->cm_id ||
- event->cm_node->state != IRDMA_CM_STATE_OFFLOADED)
+ if (!cm_node->cm_id ||
+ cm_node->state != IRDMA_CM_STATE_OFFLOADED)
break;
irdma_cm_event_connected(event);
break;
case IRDMA_CM_EVENT_MPA_REJECT:
- if (!event->cm_node->cm_id ||
+ if (!cm_node->cm_id ||
cm_node->state == IRDMA_CM_STATE_OFFLOADED)
break;
irdma_send_cm_event(cm_node, cm_node->cm_id,
IW_CM_EVENT_CONNECT_REPLY, -ECONNREFUSED);
break;
case IRDMA_CM_EVENT_ABORTED:
- if (!event->cm_node->cm_id ||
- event->cm_node->state == IRDMA_CM_STATE_OFFLOADED)
+ if (!cm_node->cm_id ||
+ cm_node->state == IRDMA_CM_STATE_OFFLOADED)
break;
irdma_event_connect_error(event);
break;
@@ -4263,7 +4266,7 @@ static void irdma_cm_event_handler(struct work_struct *work)
break;
}
- irdma_rem_ref_cm_node(event->cm_node);
+ irdma_rem_ref_cm_node(cm_node);
kfree(event);
}
diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c
index ac3721a5747a..4718acf6c6fd 100644
--- a/drivers/infiniband/hw/irdma/uk.c
+++ b/drivers/infiniband/hw/irdma/uk.c
@@ -1438,7 +1438,7 @@ exit:
* irdma_round_up_wq - return round up qp wq depth
* @wqdepth: wq depth in quanta to round up
*/
-static int irdma_round_up_wq(u32 wqdepth)
+static u64 irdma_round_up_wq(u64 wqdepth)
{
int scount = 1;
@@ -1491,15 +1491,16 @@ void irdma_get_wqe_shift(struct irdma_uk_attrs *uk_attrs, u32 sge,
int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift,
u32 *sqdepth)
{
- u32 min_size = (u32)uk_attrs->min_hw_wq_size << shift;
+ u32 min_hw_quanta = (u32)uk_attrs->min_hw_wq_size << shift;
+ u64 hw_quanta =
+ irdma_round_up_wq(((u64)sq_size << shift) + IRDMA_SQ_RSVD);
- *sqdepth = irdma_round_up_wq((sq_size << shift) + IRDMA_SQ_RSVD);
-
- if (*sqdepth < min_size)
- *sqdepth = min_size;
- else if (*sqdepth > uk_attrs->max_hw_wq_quanta)
+ if (hw_quanta < min_hw_quanta)
+ hw_quanta = min_hw_quanta;
+ else if (hw_quanta > uk_attrs->max_hw_wq_quanta)
return -EINVAL;
+ *sqdepth = hw_quanta;
return 0;
}
@@ -1513,15 +1514,16 @@ int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift,
int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift,
u32 *rqdepth)
{
- u32 min_size = (u32)uk_attrs->min_hw_wq_size << shift;
-
- *rqdepth = irdma_round_up_wq((rq_size << shift) + IRDMA_RQ_RSVD);
+ u32 min_hw_quanta = (u32)uk_attrs->min_hw_wq_size << shift;
+ u64 hw_quanta =
+ irdma_round_up_wq(((u64)rq_size << shift) + IRDMA_RQ_RSVD);
- if (*rqdepth < min_size)
- *rqdepth = min_size;
- else if (*rqdepth > uk_attrs->max_hw_rq_quanta)
+ if (hw_quanta < min_hw_quanta)
+ hw_quanta = min_hw_quanta;
+ else if (hw_quanta > uk_attrs->max_hw_rq_quanta)
return -EINVAL;
+ *rqdepth = hw_quanta;
return 0;
}
@@ -1535,13 +1537,16 @@ int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift,
int irdma_get_srqdepth(struct irdma_uk_attrs *uk_attrs, u32 srq_size, u8 shift,
u32 *srqdepth)
{
- *srqdepth = irdma_round_up_wq((srq_size << shift) + IRDMA_RQ_RSVD);
+ u32 min_hw_quanta = (u32)uk_attrs->min_hw_wq_size << shift;
+ u64 hw_quanta =
+ irdma_round_up_wq(((u64)srq_size << shift) + IRDMA_RQ_RSVD);
- if (*srqdepth < ((u32)uk_attrs->min_hw_wq_size << shift))
- *srqdepth = uk_attrs->min_hw_wq_size << shift;
- else if (*srqdepth > uk_attrs->max_hw_srq_quanta)
+ if (hw_quanta < min_hw_quanta)
+ hw_quanta = min_hw_quanta;
+ else if (hw_quanta > uk_attrs->max_hw_srq_quanta)
return -EINVAL;
+ *srqdepth = hw_quanta;
return 0;
}
diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c
index ab8c5284d4be..495e5daff4b4 100644
--- a/drivers/infiniband/hw/irdma/utils.c
+++ b/drivers/infiniband/hw/irdma/utils.c
@@ -2322,8 +2322,6 @@ void irdma_modify_qp_to_err(struct irdma_sc_qp *sc_qp)
struct irdma_qp *qp = sc_qp->qp_uk.back_qp;
struct ib_qp_attr attr;
- if (qp->iwdev->rf->reset)
- return;
attr.qp_state = IB_QPS_ERR;
if (rdma_protocol_roce(qp->ibqp.device, 1))
diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
index 7251cd7a2147..95f590c10c05 100644
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -558,7 +558,8 @@ static int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
}
irdma_qp_rem_ref(&iwqp->ibqp);
- wait_for_completion(&iwqp->free_qp);
+ if (!iwdev->rf->reset)
+ wait_for_completion(&iwqp->free_qp);
irdma_free_lsmm_rsrc(iwqp);
irdma_cqp_qp_destroy_cmd(&iwdev->rf->sc_dev, &iwqp->sc_qp);
@@ -1105,6 +1106,7 @@ static int irdma_create_qp(struct ib_qp *ibqp,
spin_lock_init(&iwqp->sc_qp.pfpdu.lock);
iwqp->sig_all = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR;
rf->qp_table[qp_num] = iwqp;
+ init_completion(&iwqp->free_qp);
if (udata) {
/* GEN_1 legacy support with libi40iw does not have expanded uresp struct */
@@ -1129,7 +1131,6 @@ static int irdma_create_qp(struct ib_qp *ibqp,
}
}
- init_completion(&iwqp->free_qp);
return 0;
error:
@@ -1462,8 +1463,6 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr,
ctx_info->remote_atomics_en = true;
}
- wait_event(iwqp->mod_qp_waitq, !atomic_read(&iwqp->hw_mod_qp_pend));
-
ibdev_dbg(&iwdev->ibdev,
"VERBS: caller: %pS qp_id=%d to_ibqpstate=%d ibqpstate=%d irdma_qpstate=%d attr_mask=0x%x\n",
__builtin_return_address(0), ibqp->qp_num, attr->qp_state,
@@ -1540,6 +1539,7 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr,
case IB_QPS_ERR:
case IB_QPS_RESET:
if (iwqp->iwarp_state == IRDMA_QP_STATE_ERROR) {
+ iwqp->ibqp_state = attr->qp_state;
spin_unlock_irqrestore(&iwqp->lock, flags);
if (udata && udata->inlen) {
if (ib_copy_from_udata(&ureq, udata,
@@ -1745,6 +1745,7 @@ int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
case IB_QPS_ERR:
case IB_QPS_RESET:
if (iwqp->iwarp_state == IRDMA_QP_STATE_ERROR) {
+ iwqp->ibqp_state = attr->qp_state;
spin_unlock_irqrestore(&iwqp->lock, flags);
if (udata && udata->inlen) {
if (ib_copy_from_udata(&ureq, udata,
@@ -3723,6 +3724,7 @@ static int irdma_rereg_mr_trans(struct irdma_mr *iwmr, u64 start, u64 len,
err:
ib_umem_release(region);
+ iwmr->region = NULL;
return err;
}
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 5dac64be61bb..94d514169642 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -1211,7 +1211,7 @@ dma_addr_t iommu_dma_map_phys(struct device *dev, phys_addr_t phys, size_t size,
*/
if (dev_use_swiotlb(dev, size, dir) &&
iova_unaligned(iovad, phys, size)) {
- if (attrs & DMA_ATTR_MMIO)
+ if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT))
return DMA_MAPPING_ERROR;
phys = iommu_dma_map_swiotlb(dev, phys, size, dir, attrs);
@@ -1223,7 +1223,8 @@ dma_addr_t iommu_dma_map_phys(struct device *dev, phys_addr_t phys, size_t size,
arch_sync_dma_for_device(phys, size, dir);
iova = __iommu_dma_map(dev, phys, size, prot, dma_mask);
- if (iova == DMA_MAPPING_ERROR && !(attrs & DMA_ATTR_MMIO))
+ if (iova == DMA_MAPPING_ERROR &&
+ !(attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT)))
swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
return iova;
}
@@ -1233,7 +1234,7 @@ void iommu_dma_unmap_phys(struct device *dev, dma_addr_t dma_handle,
{
phys_addr_t phys;
- if (attrs & DMA_ATTR_MMIO) {
+ if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT)) {
__iommu_dma_unmap(dev, dma_handle, size);
return;
}
@@ -1945,9 +1946,21 @@ int dma_iova_link(struct device *dev, struct dma_iova_state *state,
if (WARN_ON_ONCE(iova_start_pad && offset > 0))
return -EIO;
+ /*
+ * DMA_IOVA_USE_SWIOTLB is set on state after some entry
+ * took SWIOTLB path, which we were supposed to prevent
+ * for DMA_ATTR_REQUIRE_COHERENT attribute.
+ */
+ if (WARN_ON_ONCE((state->__size & DMA_IOVA_USE_SWIOTLB) &&
+ (attrs & DMA_ATTR_REQUIRE_COHERENT)))
+ return -EOPNOTSUPP;
+
+ if (!dev_is_dma_coherent(dev) && (attrs & DMA_ATTR_REQUIRE_COHERENT))
+ return -EOPNOTSUPP;
+
if (dev_use_swiotlb(dev, size, dir) &&
iova_unaligned(iovad, phys, size)) {
- if (attrs & DMA_ATTR_MMIO)
+ if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT))
return -EPERM;
return iommu_dma_iova_link_swiotlb(dev, state, phys, offset,
diff --git a/drivers/iommu/generic_pt/fmt/amdv1.h b/drivers/iommu/generic_pt/fmt/amdv1.h
index 3b2c41d9654d..8d11b08291d7 100644
--- a/drivers/iommu/generic_pt/fmt/amdv1.h
+++ b/drivers/iommu/generic_pt/fmt/amdv1.h
@@ -191,7 +191,7 @@ static inline enum pt_entry_type amdv1pt_load_entry_raw(struct pt_state *pts)
}
#define pt_load_entry_raw amdv1pt_load_entry_raw
-static inline void
+static __always_inline void
amdv1pt_install_leaf_entry(struct pt_state *pts, pt_oaddr_t oa,
unsigned int oasz_lg2,
const struct pt_write_attrs *attrs)
diff --git a/drivers/iommu/generic_pt/iommu_pt.h b/drivers/iommu/generic_pt/iommu_pt.h
index 3e33fe64feab..7e7a6e7abdee 100644
--- a/drivers/iommu/generic_pt/iommu_pt.h
+++ b/drivers/iommu/generic_pt/iommu_pt.h
@@ -1057,7 +1057,7 @@ size_t DOMAIN_NS(unmap_pages)(struct iommu_domain *domain, unsigned long iova,
pt_walk_range(&range, __unmap_range, &unmap);
- gather_range_pages(iotlb_gather, iommu_table, iova, len,
+ gather_range_pages(iotlb_gather, iommu_table, iova, unmap.unmapped,
&unmap.free_list);
return unmap.unmapped;
diff --git a/drivers/irqchip/irq-qcom-mpm.c b/drivers/irqchip/irq-qcom-mpm.c
index 83f31ea657b7..181320528a47 100644
--- a/drivers/irqchip/irq-qcom-mpm.c
+++ b/drivers/irqchip/irq-qcom-mpm.c
@@ -306,6 +306,8 @@ static int mpm_pd_power_off(struct generic_pm_domain *genpd)
if (ret < 0)
return ret;
+ mbox_client_txdone(priv->mbox_chan, 0);
+
return 0;
}
@@ -434,6 +436,7 @@ static int qcom_mpm_probe(struct platform_device *pdev, struct device_node *pare
}
priv->mbox_client.dev = dev;
+ priv->mbox_client.knows_txdone = true;
priv->mbox_chan = mbox_request_channel(&priv->mbox_client, 0);
if (IS_ERR(priv->mbox_chan)) {
ret = PTR_ERR(priv->mbox_chan);
diff --git a/drivers/irqchip/irq-renesas-rzv2h.c b/drivers/irqchip/irq-renesas-rzv2h.c
index da2bc43a0e12..03e93b061edd 100644
--- a/drivers/irqchip/irq-renesas-rzv2h.c
+++ b/drivers/irqchip/irq-renesas-rzv2h.c
@@ -621,7 +621,7 @@ static int rzv2h_icu_probe_common(struct platform_device *pdev, struct device_no
return 0;
pm_put:
- pm_runtime_put(&pdev->dev);
+ pm_runtime_put_sync(&pdev->dev);
return ret;
}
diff --git a/drivers/media/i2c/ccs/ccs-core.c b/drivers/media/i2c/ccs/ccs-core.c
index aa4dd7e7cf5a..8e25f970fd12 100644
--- a/drivers/media/i2c/ccs/ccs-core.c
+++ b/drivers/media/i2c/ccs/ccs-core.c
@@ -3080,8 +3080,6 @@ static int ccs_init_state(struct v4l2_subdev *sd,
struct v4l2_rect *crop =
v4l2_subdev_state_get_crop(sd_state, pad);
- guard(mutex)(&sensor->mutex);
-
ccs_get_native_size(ssd, crop);
fmt->width = crop->width;
diff --git a/drivers/media/mc/mc-request.c b/drivers/media/mc/mc-request.c
index c7c7d4a86c6b..13e77648807c 100644
--- a/drivers/media/mc/mc-request.c
+++ b/drivers/media/mc/mc-request.c
@@ -192,6 +192,8 @@ static long media_request_ioctl_reinit(struct media_request *req)
struct media_device *mdev = req->mdev;
unsigned long flags;
+ mutex_lock(&mdev->req_queue_mutex);
+
spin_lock_irqsave(&req->lock, flags);
if (req->state != MEDIA_REQUEST_STATE_IDLE &&
req->state != MEDIA_REQUEST_STATE_COMPLETE) {
@@ -199,6 +201,7 @@ static long media_request_ioctl_reinit(struct media_request *req)
"request: %s not in idle or complete state, cannot reinit\n",
req->debug_str);
spin_unlock_irqrestore(&req->lock, flags);
+ mutex_unlock(&mdev->req_queue_mutex);
return -EBUSY;
}
if (req->access_count) {
@@ -206,6 +209,7 @@ static long media_request_ioctl_reinit(struct media_request *req)
"request: %s is being accessed, cannot reinit\n",
req->debug_str);
spin_unlock_irqrestore(&req->lock, flags);
+ mutex_unlock(&mdev->req_queue_mutex);
return -EBUSY;
}
req->state = MEDIA_REQUEST_STATE_CLEANING;
@@ -216,6 +220,7 @@ static long media_request_ioctl_reinit(struct media_request *req)
spin_lock_irqsave(&req->lock, flags);
req->state = MEDIA_REQUEST_STATE_IDLE;
spin_unlock_irqrestore(&req->lock, flags);
+ mutex_unlock(&mdev->req_queue_mutex);
return 0;
}
diff --git a/drivers/media/platform/rockchip/rkvdec/rkvdec-hevc-common.c b/drivers/media/platform/rockchip/rkvdec/rkvdec-hevc-common.c
index 28267ee30190..3119f3bc9f98 100644
--- a/drivers/media/platform/rockchip/rkvdec/rkvdec-hevc-common.c
+++ b/drivers/media/platform/rockchip/rkvdec/rkvdec-hevc-common.c
@@ -500,11 +500,15 @@ void rkvdec_hevc_run_preamble(struct rkvdec_ctx *ctx,
ctrl = v4l2_ctrl_find(&ctx->ctrl_hdl,
V4L2_CID_STATELESS_HEVC_EXT_SPS_ST_RPS);
run->ext_sps_st_rps = ctrl ? ctrl->p_cur.p : NULL;
+ } else {
+ run->ext_sps_st_rps = NULL;
}
if (ctx->has_sps_lt_rps) {
ctrl = v4l2_ctrl_find(&ctx->ctrl_hdl,
V4L2_CID_STATELESS_HEVC_EXT_SPS_LT_RPS);
run->ext_sps_lt_rps = ctrl ? ctrl->p_cur.p : NULL;
+ } else {
+ run->ext_sps_lt_rps = NULL;
}
rkvdec_run_preamble(ctx, &run->base);
diff --git a/drivers/media/platform/rockchip/rkvdec/rkvdec-vdpu383-h264.c b/drivers/media/platform/rockchip/rkvdec/rkvdec-vdpu383-h264.c
index 97f1efde2e47..fb4f849d7366 100644
--- a/drivers/media/platform/rockchip/rkvdec/rkvdec-vdpu383-h264.c
+++ b/drivers/media/platform/rockchip/rkvdec/rkvdec-vdpu383-h264.c
@@ -130,7 +130,7 @@ struct rkvdec_h264_ctx {
struct vdpu383_regs_h26x regs;
};
-static void set_field_order_cnt(struct rkvdec_pps *pps, const struct v4l2_h264_dpb_entry *dpb)
+static noinline_for_stack void set_field_order_cnt(struct rkvdec_pps *pps, const struct v4l2_h264_dpb_entry *dpb)
{
pps->top_field_order_cnt0 = dpb[0].top_field_order_cnt;
pps->bot_field_order_cnt0 = dpb[0].bottom_field_order_cnt;
@@ -166,6 +166,31 @@ static void set_field_order_cnt(struct rkvdec_pps *pps, const struct v4l2_h264_d
pps->bot_field_order_cnt15 = dpb[15].bottom_field_order_cnt;
}
+static noinline_for_stack void set_dec_params(struct rkvdec_pps *pps, const struct v4l2_ctrl_h264_decode_params *dec_params)
+{
+ const struct v4l2_h264_dpb_entry *dpb = dec_params->dpb;
+
+ for (int i = 0; i < ARRAY_SIZE(dec_params->dpb); i++) {
+ if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM)
+ pps->is_longterm |= (1 << i);
+ pps->ref_field_flags |=
+ (!!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_FIELD)) << i;
+ pps->ref_colmv_use_flag |=
+ (!!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)) << i;
+ pps->ref_topfield_used |=
+ (!!(dpb[i].fields & V4L2_H264_TOP_FIELD_REF)) << i;
+ pps->ref_botfield_used |=
+ (!!(dpb[i].fields & V4L2_H264_BOTTOM_FIELD_REF)) << i;
+ }
+ pps->pic_field_flag =
+ !!(dec_params->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC);
+ pps->pic_associated_flag =
+ !!(dec_params->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD);
+
+ pps->cur_top_field = dec_params->top_field_order_cnt;
+ pps->cur_bot_field = dec_params->bottom_field_order_cnt;
+}
+
static void assemble_hw_pps(struct rkvdec_ctx *ctx,
struct rkvdec_h264_run *run)
{
@@ -177,7 +202,6 @@ static void assemble_hw_pps(struct rkvdec_ctx *ctx,
struct rkvdec_h264_priv_tbl *priv_tbl = h264_ctx->priv_tbl.cpu;
struct rkvdec_sps_pps *hw_ps;
u32 pic_width, pic_height;
- u32 i;
/*
* HW read the SPS/PPS information from PPS packet index by PPS id.
@@ -261,28 +285,8 @@ static void assemble_hw_pps(struct rkvdec_ctx *ctx,
!!(pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT);
set_field_order_cnt(&hw_ps->pps, dpb);
+ set_dec_params(&hw_ps->pps, dec_params);
- for (i = 0; i < ARRAY_SIZE(dec_params->dpb); i++) {
- if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM)
- hw_ps->pps.is_longterm |= (1 << i);
-
- hw_ps->pps.ref_field_flags |=
- (!!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_FIELD)) << i;
- hw_ps->pps.ref_colmv_use_flag |=
- (!!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)) << i;
- hw_ps->pps.ref_topfield_used |=
- (!!(dpb[i].fields & V4L2_H264_TOP_FIELD_REF)) << i;
- hw_ps->pps.ref_botfield_used |=
- (!!(dpb[i].fields & V4L2_H264_BOTTOM_FIELD_REF)) << i;
- }
-
- hw_ps->pps.pic_field_flag =
- !!(dec_params->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC);
- hw_ps->pps.pic_associated_flag =
- !!(dec_params->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD);
-
- hw_ps->pps.cur_top_field = dec_params->top_field_order_cnt;
- hw_ps->pps.cur_bot_field = dec_params->bottom_field_order_cnt;
}
static void rkvdec_write_regs(struct rkvdec_ctx *ctx)
diff --git a/drivers/media/platform/rockchip/rkvdec/rkvdec-vp9.c b/drivers/media/platform/rockchip/rkvdec/rkvdec-vp9.c
index e4cdd2122873..2751f5396ee8 100644
--- a/drivers/media/platform/rockchip/rkvdec/rkvdec-vp9.c
+++ b/drivers/media/platform/rockchip/rkvdec/rkvdec-vp9.c
@@ -893,7 +893,8 @@ out_update_last:
update_ctx_last_info(vp9_ctx);
}
-static void rkvdec_init_v4l2_vp9_count_tbl(struct rkvdec_ctx *ctx)
+static noinline_for_stack void
+rkvdec_init_v4l2_vp9_count_tbl(struct rkvdec_ctx *ctx)
{
struct rkvdec_vp9_ctx *vp9_ctx = ctx->priv;
struct rkvdec_vp9_intra_frame_symbol_counts *intra_cnts = vp9_ctx->count_tbl.cpu;
diff --git a/drivers/media/platform/synopsys/Kconfig b/drivers/media/platform/synopsys/Kconfig
index e798ec00b189..bf2ac092fbb3 100644
--- a/drivers/media/platform/synopsys/Kconfig
+++ b/drivers/media/platform/synopsys/Kconfig
@@ -7,6 +7,7 @@ config VIDEO_DW_MIPI_CSI2RX
depends on VIDEO_DEV
depends on V4L_PLATFORM_DRIVERS
depends on PM && COMMON_CLK
+ select GENERIC_PHY_MIPI_DPHY
select MEDIA_CONTROLLER
select V4L2_FWNODE
select VIDEO_V4L2_SUBDEV_API
diff --git a/drivers/media/platform/synopsys/dw-mipi-csi2rx.c b/drivers/media/platform/synopsys/dw-mipi-csi2rx.c
index 170346ae1a59..4d96171a650b 100644
--- a/drivers/media/platform/synopsys/dw-mipi-csi2rx.c
+++ b/drivers/media/platform/synopsys/dw-mipi-csi2rx.c
@@ -301,7 +301,7 @@ dw_mipi_csi2rx_enum_mbus_code(struct v4l2_subdev *sd,
return 0;
case DW_MIPI_CSI2RX_PAD_SINK:
- if (code->index > csi2->formats_num)
+ if (code->index >= csi2->formats_num)
return -EINVAL;
code->code = csi2->formats[code->index].code;
diff --git a/drivers/media/platform/verisilicon/imx8m_vpu_hw.c b/drivers/media/platform/verisilicon/imx8m_vpu_hw.c
index 6f8e43b7f157..fa4224de4b99 100644
--- a/drivers/media/platform/verisilicon/imx8m_vpu_hw.c
+++ b/drivers/media/platform/verisilicon/imx8m_vpu_hw.c
@@ -343,7 +343,7 @@ const struct hantro_variant imx8mq_vpu_variant = {
.num_regs = ARRAY_SIZE(imx8mq_reg_names)
};
-static const struct of_device_id imx8mq_vpu_shared_resources[] __initconst = {
+static const struct of_device_id imx8mq_vpu_shared_resources[] = {
{ .compatible = "nxp,imx8mq-vpu-g1", },
{ .compatible = "nxp,imx8mq-vpu-g2", },
{ /* sentinel */ }
diff --git a/drivers/media/usb/uvc/uvc_video.c b/drivers/media/usb/uvc/uvc_video.c
index 40c76c051da2..f6c8e3223796 100644
--- a/drivers/media/usb/uvc/uvc_video.c
+++ b/drivers/media/usb/uvc/uvc_video.c
@@ -1751,7 +1751,8 @@ static void uvc_video_complete(struct urb *urb)
/*
* Free transfer buffers.
*/
-static void uvc_free_urb_buffers(struct uvc_streaming *stream)
+static void uvc_free_urb_buffers(struct uvc_streaming *stream,
+ unsigned int size)
{
struct usb_device *udev = stream->dev->udev;
struct uvc_urb *uvc_urb;
@@ -1760,7 +1761,7 @@ static void uvc_free_urb_buffers(struct uvc_streaming *stream)
if (!uvc_urb->buffer)
continue;
- usb_free_noncoherent(udev, stream->urb_size, uvc_urb->buffer,
+ usb_free_noncoherent(udev, size, uvc_urb->buffer,
uvc_stream_dir(stream), uvc_urb->sgt);
uvc_urb->buffer = NULL;
uvc_urb->sgt = NULL;
@@ -1820,7 +1821,7 @@ static int uvc_alloc_urb_buffers(struct uvc_streaming *stream,
if (!uvc_alloc_urb_buffer(stream, uvc_urb, urb_size,
gfp_flags)) {
- uvc_free_urb_buffers(stream);
+ uvc_free_urb_buffers(stream, urb_size);
break;
}
@@ -1868,7 +1869,7 @@ static void uvc_video_stop_transfer(struct uvc_streaming *stream,
}
if (free_buffers)
- uvc_free_urb_buffers(stream);
+ uvc_free_urb_buffers(stream, stream->urb_size);
}
/*
diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c
index 37d33d4a363d..a2b650f4ec3c 100644
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -3082,13 +3082,14 @@ static long __video_do_ioctl(struct file *file,
}
/*
- * We need to serialize streamon/off with queueing new requests.
+ * We need to serialize streamon/off/reqbufs with queueing new requests.
* These ioctls may trigger the cancellation of a streaming
* operation, and that should not be mixed with queueing a new
* request at the same time.
*/
if (v4l2_device_supports_requests(vfd->v4l2_dev) &&
- (cmd == VIDIOC_STREAMON || cmd == VIDIOC_STREAMOFF)) {
+ (cmd == VIDIOC_STREAMON || cmd == VIDIOC_STREAMOFF ||
+ cmd == VIDIOC_REQBUFS)) {
req_queue_lock = &vfd->v4l2_dev->mdev->req_queue_mutex;
if (mutex_lock_interruptible(req_queue_lock))
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 33f414d03ab9..a5484d11553d 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -5326,7 +5326,7 @@ static netdev_tx_t bond_xmit_broadcast(struct sk_buff *skb,
if (!(bond_slave_is_up(slave) && slave->link == BOND_LINK_UP))
continue;
- if (bond_is_last_slave(bond, slave)) {
+ if (i + 1 == slaves_count) {
skb2 = skb;
skb_used = true;
} else {
diff --git a/drivers/net/can/dev/netlink.c b/drivers/net/can/dev/netlink.c
index 0498198a4696..766d455950f5 100644
--- a/drivers/net/can/dev/netlink.c
+++ b/drivers/net/can/dev/netlink.c
@@ -601,7 +601,9 @@ static int can_changelink(struct net_device *dev, struct nlattr *tb[],
/* We need synchronization with dev->stop() */
ASSERT_RTNL();
- can_ctrlmode_changelink(dev, data, extack);
+ err = can_ctrlmode_changelink(dev, data, extack);
+ if (err)
+ return err;
if (data[IFLA_CAN_BITTIMING]) {
struct can_bittiming bt;
diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c
index bb7782582f40..0d0190ae094a 100644
--- a/drivers/net/can/spi/mcp251x.c
+++ b/drivers/net/can/spi/mcp251x.c
@@ -1225,7 +1225,11 @@ static int mcp251x_open(struct net_device *net)
}
mutex_lock(&priv->mcp_lock);
- mcp251x_power_enable(priv->transceiver, 1);
+ ret = mcp251x_power_enable(priv->transceiver, 1);
+ if (ret) {
+ dev_err(&spi->dev, "failed to enable transceiver power: %pe\n", ERR_PTR(ret));
+ goto out_close_candev;
+ }
priv->force_quit = 0;
priv->tx_skb = NULL;
@@ -1272,6 +1276,7 @@ out_free_irq:
mcp251x_hw_sleep(spi);
out_close:
mcp251x_power_enable(priv->transceiver, 0);
+out_close_candev:
close_candev(net);
mutex_unlock(&priv->mcp_lock);
if (release_irq)
@@ -1516,11 +1521,25 @@ static int __maybe_unused mcp251x_can_resume(struct device *dev)
{
struct spi_device *spi = to_spi_device(dev);
struct mcp251x_priv *priv = spi_get_drvdata(spi);
+ int ret = 0;
- if (priv->after_suspend & AFTER_SUSPEND_POWER)
- mcp251x_power_enable(priv->power, 1);
- if (priv->after_suspend & AFTER_SUSPEND_UP)
- mcp251x_power_enable(priv->transceiver, 1);
+ if (priv->after_suspend & AFTER_SUSPEND_POWER) {
+ ret = mcp251x_power_enable(priv->power, 1);
+ if (ret) {
+ dev_err(dev, "failed to restore power: %pe\n", ERR_PTR(ret));
+ return ret;
+ }
+ }
+
+ if (priv->after_suspend & AFTER_SUSPEND_UP) {
+ ret = mcp251x_power_enable(priv->transceiver, 1);
+ if (ret) {
+ dev_err(dev, "failed to restore transceiver power: %pe\n", ERR_PTR(ret));
+ if (priv->after_suspend & AFTER_SUSPEND_POWER)
+ mcp251x_power_enable(priv->power, 0);
+ return ret;
+ }
+ }
if (priv->after_suspend & (AFTER_SUSPEND_POWER | AFTER_SUSPEND_UP))
queue_work(priv->wq, &priv->restart_work);
diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c
index 56cf9a926a83..95ba99b89428 100644
--- a/drivers/net/ethernet/airoha/airoha_eth.c
+++ b/drivers/net/ethernet/airoha/airoha_eth.c
@@ -794,18 +794,34 @@ static int airoha_qdma_init_rx_queue(struct airoha_queue *q,
static void airoha_qdma_cleanup_rx_queue(struct airoha_queue *q)
{
- struct airoha_eth *eth = q->qdma->eth;
+ struct airoha_qdma *qdma = q->qdma;
+ struct airoha_eth *eth = qdma->eth;
+ int qid = q - &qdma->q_rx[0];
while (q->queued) {
struct airoha_queue_entry *e = &q->entry[q->tail];
+ struct airoha_qdma_desc *desc = &q->desc[q->tail];
struct page *page = virt_to_head_page(e->buf);
dma_sync_single_for_cpu(eth->dev, e->dma_addr, e->dma_len,
page_pool_get_dma_dir(q->page_pool));
page_pool_put_full_page(q->page_pool, page, false);
+ /* Reset DMA descriptor */
+ WRITE_ONCE(desc->ctrl, 0);
+ WRITE_ONCE(desc->addr, 0);
+ WRITE_ONCE(desc->data, 0);
+ WRITE_ONCE(desc->msg0, 0);
+ WRITE_ONCE(desc->msg1, 0);
+ WRITE_ONCE(desc->msg2, 0);
+ WRITE_ONCE(desc->msg3, 0);
+
q->tail = (q->tail + 1) % q->ndesc;
q->queued--;
}
+
+ q->head = q->tail;
+ airoha_qdma_rmw(qdma, REG_RX_DMA_IDX(qid), RX_RING_DMA_IDX_MASK,
+ FIELD_PREP(RX_RING_DMA_IDX_MASK, q->tail));
}
static int airoha_qdma_init_rx(struct airoha_qdma *qdma)
@@ -2946,6 +2962,8 @@ static int airoha_register_gdm_devices(struct airoha_eth *eth)
return err;
}
+ set_bit(DEV_STATE_REGISTERED, &eth->state);
+
return 0;
}
diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h
index 20e602d61e61..a97903569335 100644
--- a/drivers/net/ethernet/airoha/airoha_eth.h
+++ b/drivers/net/ethernet/airoha/airoha_eth.h
@@ -88,6 +88,7 @@ enum {
enum {
DEV_STATE_INITIALIZED,
+ DEV_STATE_REGISTERED,
};
enum {
diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c
index 42dbe8f93231..c2c32b6833df 100644
--- a/drivers/net/ethernet/airoha/airoha_ppe.c
+++ b/drivers/net/ethernet/airoha/airoha_ppe.c
@@ -227,7 +227,9 @@ static int airoha_ppe_get_wdma_info(struct net_device *dev, const u8 *addr,
if (!dev)
return -ENODEV;
+ rcu_read_lock();
err = dev_fill_forward_path(dev, addr, &stack);
+ rcu_read_unlock();
if (err)
return err;
@@ -1366,6 +1368,13 @@ int airoha_ppe_setup_tc_block_cb(struct airoha_ppe_dev *dev, void *type_data)
struct airoha_eth *eth = ppe->eth;
int err = 0;
+ /* Netfilter flowtable can try to offload flower rules while not all
+ * the net_devices are registered or initialized. Delay offloading
+ * until all net_devices are registered in the system.
+ */
+ if (!test_bit(DEV_STATE_REGISTERED, &eth->state))
+ return -EBUSY;
+
mutex_lock(&flow_offload_mutex);
if (!eth->npu)
diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig
index cd7dddeb91dd..9787c1857e13 100644
--- a/drivers/net/ethernet/broadcom/Kconfig
+++ b/drivers/net/ethernet/broadcom/Kconfig
@@ -25,7 +25,7 @@ config B44
select SSB
select MII
select PHYLIB
- select FIXED_PHY if BCM47XX
+ select FIXED_PHY
help
If you have a network (Ethernet) controller of this type, say Y
or M here.
diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp.c b/drivers/net/ethernet/broadcom/asp2/bcmasp.c
index aa6d8606849f..972474893a6b 100644
--- a/drivers/net/ethernet/broadcom/asp2/bcmasp.c
+++ b/drivers/net/ethernet/broadcom/asp2/bcmasp.c
@@ -1152,12 +1152,6 @@ void bcmasp_enable_wol(struct bcmasp_intf *intf, bool en)
}
}
-static void bcmasp_wol_irq_destroy(struct bcmasp_priv *priv)
-{
- if (priv->wol_irq > 0)
- free_irq(priv->wol_irq, priv);
-}
-
static void bcmasp_eee_fixup(struct bcmasp_intf *intf, bool en)
{
u32 reg, phy_lpi_overwrite;
@@ -1255,7 +1249,7 @@ static int bcmasp_probe(struct platform_device *pdev)
if (priv->irq <= 0)
return -EINVAL;
- priv->clk = devm_clk_get_optional_enabled(dev, "sw_asp");
+ priv->clk = devm_clk_get_optional(dev, "sw_asp");
if (IS_ERR(priv->clk))
return dev_err_probe(dev, PTR_ERR(priv->clk),
"failed to request clock\n");
@@ -1283,6 +1277,10 @@ static int bcmasp_probe(struct platform_device *pdev)
bcmasp_set_pdata(priv, pdata);
+ ret = clk_prepare_enable(priv->clk);
+ if (ret)
+ return dev_err_probe(dev, ret, "failed to start clock\n");
+
/* Enable all clocks to ensure successful probing */
bcmasp_core_clock_set(priv, ASP_CTRL_CLOCK_CTRL_ASP_ALL_DISABLE, 0);
@@ -1294,8 +1292,10 @@ static int bcmasp_probe(struct platform_device *pdev)
ret = devm_request_irq(&pdev->dev, priv->irq, bcmasp_isr, 0,
pdev->name, priv);
- if (ret)
- return dev_err_probe(dev, ret, "failed to request ASP interrupt: %d", ret);
+ if (ret) {
+ dev_err(dev, "Failed to request ASP interrupt: %d", ret);
+ goto err_clock_disable;
+ }
/* Register mdio child nodes */
of_platform_populate(dev->of_node, bcmasp_mdio_of_match, NULL, dev);
@@ -1307,13 +1307,17 @@ static int bcmasp_probe(struct platform_device *pdev)
priv->mda_filters = devm_kcalloc(dev, priv->num_mda_filters,
sizeof(*priv->mda_filters), GFP_KERNEL);
- if (!priv->mda_filters)
- return -ENOMEM;
+ if (!priv->mda_filters) {
+ ret = -ENOMEM;
+ goto err_clock_disable;
+ }
priv->net_filters = devm_kcalloc(dev, priv->num_net_filters,
sizeof(*priv->net_filters), GFP_KERNEL);
- if (!priv->net_filters)
- return -ENOMEM;
+ if (!priv->net_filters) {
+ ret = -ENOMEM;
+ goto err_clock_disable;
+ }
bcmasp_core_init_filters(priv);
@@ -1322,7 +1326,8 @@ static int bcmasp_probe(struct platform_device *pdev)
ports_node = of_find_node_by_name(dev->of_node, "ethernet-ports");
if (!ports_node) {
dev_warn(dev, "No ports found\n");
- return -EINVAL;
+ ret = -EINVAL;
+ goto err_clock_disable;
}
i = 0;
@@ -1344,8 +1349,6 @@ static int bcmasp_probe(struct platform_device *pdev)
*/
bcmasp_core_clock_set(priv, 0, ASP_CTRL_CLOCK_CTRL_ASP_ALL_DISABLE);
- clk_disable_unprepare(priv->clk);
-
/* Now do the registration of the network ports which will take care
* of managing the clock properly.
*/
@@ -1358,13 +1361,16 @@ static int bcmasp_probe(struct platform_device *pdev)
count++;
}
+ clk_disable_unprepare(priv->clk);
+
dev_info(dev, "Initialized %d port(s)\n", count);
return ret;
err_cleanup:
- bcmasp_wol_irq_destroy(priv);
bcmasp_remove_intfs(priv);
+err_clock_disable:
+ clk_disable_unprepare(priv->clk);
return ret;
}
@@ -1376,7 +1382,6 @@ static void bcmasp_remove(struct platform_device *pdev)
if (!priv)
return;
- bcmasp_wol_irq_destroy(priv);
bcmasp_remove_intfs(priv);
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 0751c0e4581a..3f775196ef81 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -8045,6 +8045,8 @@ static int __bnxt_reserve_rings(struct bnxt *bp)
ulp_msix = bnxt_get_avail_msix(bp, bp->ulp_num_msix_want);
if (!ulp_msix)
bnxt_set_ulp_stat_ctxs(bp, 0);
+ else
+ bnxt_set_dflt_ulp_stat_ctxs(bp);
if (ulp_msix > bp->ulp_num_msix_want)
ulp_msix = bp->ulp_num_msix_want;
@@ -8671,7 +8673,7 @@ static int bnxt_hwrm_func_backing_store_qcaps_v2(struct bnxt *bp)
struct hwrm_func_backing_store_qcaps_v2_output *resp;
struct hwrm_func_backing_store_qcaps_v2_input *req;
struct bnxt_ctx_mem_info *ctx = bp->ctx;
- u16 type;
+ u16 type, next_type = 0;
int rc;
rc = hwrm_req_init(bp, req, HWRM_FUNC_BACKING_STORE_QCAPS_V2);
@@ -8687,7 +8689,7 @@ static int bnxt_hwrm_func_backing_store_qcaps_v2(struct bnxt *bp)
resp = hwrm_req_hold(bp, req);
- for (type = 0; type < BNXT_CTX_V2_MAX; ) {
+ for (type = 0; type < BNXT_CTX_V2_MAX; type = next_type) {
struct bnxt_ctx_mem_type *ctxm = &ctx->ctx_arr[type];
u8 init_val, init_off, i;
u32 max_entries;
@@ -8700,7 +8702,7 @@ static int bnxt_hwrm_func_backing_store_qcaps_v2(struct bnxt *bp)
if (rc)
goto ctx_done;
flags = le32_to_cpu(resp->flags);
- type = le16_to_cpu(resp->next_valid_type);
+ next_type = le16_to_cpu(resp->next_valid_type);
if (!(flags & BNXT_CTX_MEM_TYPE_VALID)) {
bnxt_free_one_ctx_mem(bp, ctxm, true);
continue;
@@ -8715,7 +8717,7 @@ static int bnxt_hwrm_func_backing_store_qcaps_v2(struct bnxt *bp)
else
continue;
}
- ctxm->type = le16_to_cpu(resp->type);
+ ctxm->type = type;
ctxm->entry_size = entry_size;
ctxm->flags = flags;
ctxm->instance_bmap = le32_to_cpu(resp->instance_bit_map);
@@ -12992,6 +12994,21 @@ static int bnxt_tx_nr_rings_per_tc(struct bnxt *bp)
return bp->num_tc ? bp->tx_nr_rings / bp->num_tc : bp->tx_nr_rings;
}
+static void bnxt_set_xdp_tx_rings(struct bnxt *bp)
+{
+ bp->tx_nr_rings_xdp = bp->tx_nr_rings_per_tc;
+ bp->tx_nr_rings += bp->tx_nr_rings_xdp;
+}
+
+static void bnxt_adj_tx_rings(struct bnxt *bp)
+{
+ /* Make adjustments if reserved TX rings are less than requested */
+ bp->tx_nr_rings -= bp->tx_nr_rings_xdp;
+ bp->tx_nr_rings_per_tc = bnxt_tx_nr_rings_per_tc(bp);
+ if (bp->tx_nr_rings_xdp)
+ bnxt_set_xdp_tx_rings(bp);
+}
+
static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
{
int rc = 0;
@@ -13009,13 +13026,7 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
if (rc)
return rc;
- /* Make adjustments if reserved TX rings are less than requested */
- bp->tx_nr_rings -= bp->tx_nr_rings_xdp;
- bp->tx_nr_rings_per_tc = bnxt_tx_nr_rings_per_tc(bp);
- if (bp->tx_nr_rings_xdp) {
- bp->tx_nr_rings_xdp = bp->tx_nr_rings_per_tc;
- bp->tx_nr_rings += bp->tx_nr_rings_xdp;
- }
+ bnxt_adj_tx_rings(bp);
rc = bnxt_alloc_mem(bp, irq_re_init);
if (rc) {
netdev_err(bp->dev, "bnxt_alloc_mem err: %x\n", rc);
@@ -15436,11 +15447,19 @@ static int bnxt_change_mtu(struct net_device *dev, int new_mtu)
return 0;
}
+void bnxt_set_cp_rings(struct bnxt *bp, bool sh)
+{
+ int tx_cp = bnxt_num_tx_to_cp(bp, bp->tx_nr_rings);
+
+ bp->cp_nr_rings = sh ? max_t(int, tx_cp, bp->rx_nr_rings) :
+ tx_cp + bp->rx_nr_rings;
+}
+
int bnxt_setup_mq_tc(struct net_device *dev, u8 tc)
{
struct bnxt *bp = netdev_priv(dev);
bool sh = false;
- int rc, tx_cp;
+ int rc;
if (tc > bp->max_tc) {
netdev_err(dev, "Too many traffic classes requested: %d. Max supported is %d.\n",
@@ -15473,9 +15492,7 @@ int bnxt_setup_mq_tc(struct net_device *dev, u8 tc)
bp->num_tc = 0;
}
bp->tx_nr_rings += bp->tx_nr_rings_xdp;
- tx_cp = bnxt_num_tx_to_cp(bp, bp->tx_nr_rings);
- bp->cp_nr_rings = sh ? max_t(int, tx_cp, bp->rx_nr_rings) :
- tx_cp + bp->rx_nr_rings;
+ bnxt_set_cp_rings(bp, sh);
if (netif_running(bp->dev))
return bnxt_open_nic(bp, true, false);
@@ -16525,6 +16542,19 @@ static void bnxt_trim_dflt_sh_rings(struct bnxt *bp)
bp->tx_nr_rings = bnxt_tx_nr_rings(bp);
}
+static void bnxt_adj_dflt_rings(struct bnxt *bp, bool sh)
+{
+ if (sh)
+ bnxt_trim_dflt_sh_rings(bp);
+ else
+ bp->cp_nr_rings = bp->tx_nr_rings_per_tc + bp->rx_nr_rings;
+ bp->tx_nr_rings = bnxt_tx_nr_rings(bp);
+ if (sh && READ_ONCE(bp->xdp_prog)) {
+ bnxt_set_xdp_tx_rings(bp);
+ bnxt_set_cp_rings(bp, true);
+ }
+}
+
static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh)
{
int dflt_rings, max_rx_rings, max_tx_rings, rc;
@@ -16550,11 +16580,8 @@ static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh)
return rc;
bp->rx_nr_rings = min_t(int, dflt_rings, max_rx_rings);
bp->tx_nr_rings_per_tc = min_t(int, dflt_rings, max_tx_rings);
- if (sh)
- bnxt_trim_dflt_sh_rings(bp);
- else
- bp->cp_nr_rings = bp->tx_nr_rings_per_tc + bp->rx_nr_rings;
- bp->tx_nr_rings = bnxt_tx_nr_rings(bp);
+
+ bnxt_adj_dflt_rings(bp, sh);
avail_msix = bnxt_get_max_func_irqs(bp) - bp->cp_nr_rings;
if (avail_msix >= BNXT_MIN_ROCE_CP_RINGS) {
@@ -16567,16 +16594,17 @@ static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh)
rc = __bnxt_reserve_rings(bp);
if (rc && rc != -ENODEV)
netdev_warn(bp->dev, "Unable to reserve tx rings\n");
- bp->tx_nr_rings_per_tc = bnxt_tx_nr_rings_per_tc(bp);
+
+ bnxt_adj_tx_rings(bp);
if (sh)
- bnxt_trim_dflt_sh_rings(bp);
+ bnxt_adj_dflt_rings(bp, true);
/* Rings may have been trimmed, re-reserve the trimmed rings. */
if (bnxt_need_reserve_rings(bp)) {
rc = __bnxt_reserve_rings(bp);
if (rc && rc != -ENODEV)
netdev_warn(bp->dev, "2nd rings reservation failed.\n");
- bp->tx_nr_rings_per_tc = bnxt_tx_nr_rings_per_tc(bp);
+ bnxt_adj_tx_rings(bp);
}
if (BNXT_CHIP_TYPE_NITRO_A0(bp)) {
bp->rx_nr_rings++;
@@ -16610,7 +16638,7 @@ static int bnxt_init_dflt_ring_mode(struct bnxt *bp)
if (rc)
goto init_dflt_ring_err;
- bp->tx_nr_rings_per_tc = bnxt_tx_nr_rings_per_tc(bp);
+ bnxt_adj_tx_rings(bp);
bnxt_set_dflt_rfs(bp);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index a97d651130df..4bc7f7aeaab3 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -2985,6 +2985,7 @@ int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs,
int tx_xdp);
int bnxt_fw_init_one(struct bnxt *bp);
bool bnxt_hwrm_reset_permitted(struct bnxt *bp);
+void bnxt_set_cp_rings(struct bnxt *bp, bool sh);
int bnxt_setup_mq_tc(struct net_device *dev, u8 tc);
struct bnxt_ntuple_filter *bnxt_lookup_ntp_filter_from_idx(struct bnxt *bp,
struct bnxt_ntuple_filter *fltr, u32 idx);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 28d0ece2e7b1..0407aa1b3190 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -945,7 +945,6 @@ static int bnxt_set_channels(struct net_device *dev,
bool sh = false;
int tx_xdp = 0;
int rc = 0;
- int tx_cp;
if (channel->other_count)
return -EINVAL;
@@ -1013,9 +1012,7 @@ static int bnxt_set_channels(struct net_device *dev,
if (tcs > 1)
bp->tx_nr_rings = bp->tx_nr_rings_per_tc * tcs + tx_xdp;
- tx_cp = bnxt_num_tx_to_cp(bp, bp->tx_nr_rings);
- bp->cp_nr_rings = sh ? max_t(int, tx_cp, bp->rx_nr_rings) :
- tx_cp + bp->rx_nr_rings;
+ bnxt_set_cp_rings(bp, sh);
/* After changing number of rx channels, update NTUPLE feature. */
netdev_update_features(dev);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index 85cbeb35681c..bebe37e139c9 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -384,7 +384,7 @@ int bnxt_xdp_xmit(struct net_device *dev, int num_frames,
static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog)
{
struct net_device *dev = bp->dev;
- int tx_xdp = 0, tx_cp, rc, tc;
+ int tx_xdp = 0, rc, tc;
struct bpf_prog *old;
netdev_assert_locked(dev);
@@ -431,8 +431,7 @@ static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog)
}
bp->tx_nr_rings_xdp = tx_xdp;
bp->tx_nr_rings = bp->tx_nr_rings_per_tc * tc + tx_xdp;
- tx_cp = bnxt_num_tx_to_cp(bp, bp->tx_nr_rings);
- bp->cp_nr_rings = max_t(int, tx_cp, bp->rx_nr_rings);
+ bnxt_set_cp_rings(bp, true);
bnxt_set_tpa_flags(bp);
bnxt_set_ring_params(bp);
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 21a5dd342724..73a4b569b03e 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -12299,7 +12299,7 @@ static int tg3_get_link_ksettings(struct net_device *dev,
ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
advertising);
- if (netif_running(dev) && tp->link_up) {
+ if (netif_running(dev) && netif_carrier_ok(dev)) {
cmd->base.speed = tp->link_config.active_speed;
cmd->base.duplex = tp->link_config.active_duplex;
ethtool_convert_legacy_u32_to_link_mode(
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index c16ac9c76aa3..99e7d5cf3786 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -1071,7 +1071,7 @@ static void macb_tx_unmap(struct macb *bp, struct macb_tx_skb *tx_skb, int budge
}
if (tx_skb->skb) {
- napi_consume_skb(tx_skb->skb, budget);
+ dev_consume_skb_any(tx_skb->skb);
tx_skb->skb = NULL;
}
}
@@ -3224,7 +3224,7 @@ static void gem_get_ethtool_stats(struct net_device *dev,
spin_lock_irq(&bp->stats_lock);
gem_update_stats(bp);
memcpy(data, &bp->ethtool_stats, sizeof(u64)
- * (GEM_STATS_LEN + QUEUE_STATS_LEN * MACB_MAX_QUEUES));
+ * (GEM_STATS_LEN + QUEUE_STATS_LEN * bp->num_queues));
spin_unlock_irq(&bp->stats_lock);
}
@@ -5776,9 +5776,9 @@ static int __maybe_unused macb_suspend(struct device *dev)
struct macb_queue *queue;
struct in_device *idev;
unsigned long flags;
+ u32 tmp, ifa_local;
unsigned int q;
int err;
- u32 tmp;
if (!device_may_wakeup(&bp->dev->dev))
phy_exit(bp->phy);
@@ -5787,14 +5787,21 @@ static int __maybe_unused macb_suspend(struct device *dev)
return 0;
if (bp->wol & MACB_WOL_ENABLED) {
- /* Check for IP address in WOL ARP mode */
- idev = __in_dev_get_rcu(bp->dev);
- if (idev)
- ifa = rcu_dereference(idev->ifa_list);
- if ((bp->wolopts & WAKE_ARP) && !ifa) {
- netdev_err(netdev, "IP address not assigned as required by WoL walk ARP\n");
- return -EOPNOTSUPP;
+ if (bp->wolopts & WAKE_ARP) {
+ /* Check for IP address in WOL ARP mode */
+ rcu_read_lock();
+ idev = __in_dev_get_rcu(bp->dev);
+ if (idev)
+ ifa = rcu_dereference(idev->ifa_list);
+ if (!ifa) {
+ rcu_read_unlock();
+ netdev_err(netdev, "IP address not assigned as required by WoL walk ARP\n");
+ return -EOPNOTSUPP;
+ }
+ ifa_local = be32_to_cpu(ifa->ifa_local);
+ rcu_read_unlock();
}
+
spin_lock_irqsave(&bp->lock, flags);
/* Disable Tx and Rx engines before disabling the queues,
@@ -5833,8 +5840,9 @@ static int __maybe_unused macb_suspend(struct device *dev)
if (bp->wolopts & WAKE_ARP) {
tmp |= MACB_BIT(ARP);
/* write IP address into register */
- tmp |= MACB_BFEXT(IP, be32_to_cpu(ifa->ifa_local));
+ tmp |= MACB_BFEXT(IP, ifa_local);
}
+ spin_unlock_irqrestore(&bp->lock, flags);
/* Change interrupt handler and
* Enable WoL IRQ on queue 0
@@ -5847,11 +5855,12 @@ static int __maybe_unused macb_suspend(struct device *dev)
dev_err(dev,
"Unable to request IRQ %d (error %d)\n",
bp->queues[0].irq, err);
- spin_unlock_irqrestore(&bp->lock, flags);
return err;
}
+ spin_lock_irqsave(&bp->lock, flags);
queue_writel(bp->queues, IER, GEM_BIT(WOL));
gem_writel(bp, WOL, tmp);
+ spin_unlock_irqrestore(&bp->lock, flags);
} else {
err = devm_request_irq(dev, bp->queues[0].irq, macb_wol_interrupt,
IRQF_SHARED, netdev->name, bp->queues);
@@ -5859,13 +5868,13 @@ static int __maybe_unused macb_suspend(struct device *dev)
dev_err(dev,
"Unable to request IRQ %d (error %d)\n",
bp->queues[0].irq, err);
- spin_unlock_irqrestore(&bp->lock, flags);
return err;
}
+ spin_lock_irqsave(&bp->lock, flags);
queue_writel(bp->queues, IER, MACB_BIT(WOL));
macb_writel(bp, WOL, tmp);
+ spin_unlock_irqrestore(&bp->lock, flags);
}
- spin_unlock_irqrestore(&bp->lock, flags);
enable_irq_wake(bp->queues[0].irq);
}
@@ -5932,6 +5941,8 @@ static int __maybe_unused macb_resume(struct device *dev)
queue_readl(bp->queues, ISR);
if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
queue_writel(bp->queues, ISR, -1);
+ spin_unlock_irqrestore(&bp->lock, flags);
+
/* Replace interrupt handler on queue 0 */
devm_free_irq(dev, bp->queues[0].irq, bp->queues);
err = devm_request_irq(dev, bp->queues[0].irq, macb_interrupt,
@@ -5940,10 +5951,8 @@ static int __maybe_unused macb_resume(struct device *dev)
dev_err(dev,
"Unable to request IRQ %d (error %d)\n",
bp->queues[0].irq, err);
- spin_unlock_irqrestore(&bp->lock, flags);
return err;
}
- spin_unlock_irqrestore(&bp->lock, flags);
disable_irq_wake(bp->queues[0].irq);
diff --git a/drivers/net/ethernet/cadence/macb_pci.c b/drivers/net/ethernet/cadence/macb_pci.c
index fc4f5aee6ab3..b79dec17e6b0 100644
--- a/drivers/net/ethernet/cadence/macb_pci.c
+++ b/drivers/net/ethernet/cadence/macb_pci.c
@@ -96,10 +96,10 @@ static int macb_probe(struct pci_dev *pdev, const struct pci_device_id *id)
return 0;
err_plat_dev_register:
- clk_unregister(plat_data.hclk);
+ clk_unregister_fixed_rate(plat_data.hclk);
err_hclk_register:
- clk_unregister(plat_data.pclk);
+ clk_unregister_fixed_rate(plat_data.pclk);
err_pclk_register:
return err;
@@ -109,10 +109,12 @@ static void macb_remove(struct pci_dev *pdev)
{
struct platform_device *plat_dev = pci_get_drvdata(pdev);
struct macb_platform_data *plat_data = dev_get_platdata(&plat_dev->dev);
+ struct clk *pclk = plat_data->pclk;
+ struct clk *hclk = plat_data->hclk;
- clk_unregister(plat_data->pclk);
- clk_unregister(plat_data->hclk);
platform_device_unregister(plat_dev);
+ clk_unregister_fixed_rate(pclk);
+ clk_unregister_fixed_rate(hclk);
}
static const struct pci_device_id dev_id_table[] = {
diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index 1e91e79c8134..6d2fe5c2f390 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -977,19 +977,19 @@ static int ftgmac100_alloc_rings(struct ftgmac100 *priv)
priv->tx_skbs = kcalloc(MAX_TX_QUEUE_ENTRIES, sizeof(void *),
GFP_KERNEL);
if (!priv->tx_skbs)
- return -ENOMEM;
+ goto err_free_rx_skbs;
/* Allocate descriptors */
priv->rxdes = dma_alloc_coherent(priv->dev,
MAX_RX_QUEUE_ENTRIES * sizeof(struct ftgmac100_rxdes),
&priv->rxdes_dma, GFP_KERNEL);
if (!priv->rxdes)
- return -ENOMEM;
+ goto err_free_tx_skbs;
priv->txdes = dma_alloc_coherent(priv->dev,
MAX_TX_QUEUE_ENTRIES * sizeof(struct ftgmac100_txdes),
&priv->txdes_dma, GFP_KERNEL);
if (!priv->txdes)
- return -ENOMEM;
+ goto err_free_rxdes;
/* Allocate scratch packet buffer */
priv->rx_scratch = dma_alloc_coherent(priv->dev,
@@ -997,9 +997,29 @@ static int ftgmac100_alloc_rings(struct ftgmac100 *priv)
&priv->rx_scratch_dma,
GFP_KERNEL);
if (!priv->rx_scratch)
- return -ENOMEM;
+ goto err_free_txdes;
return 0;
+
+err_free_txdes:
+ dma_free_coherent(priv->dev,
+ MAX_TX_QUEUE_ENTRIES *
+ sizeof(struct ftgmac100_txdes),
+ priv->txdes, priv->txdes_dma);
+ priv->txdes = NULL;
+err_free_rxdes:
+ dma_free_coherent(priv->dev,
+ MAX_RX_QUEUE_ENTRIES *
+ sizeof(struct ftgmac100_rxdes),
+ priv->rxdes, priv->rxdes_dma);
+ priv->rxdes = NULL;
+err_free_tx_skbs:
+ kfree(priv->tx_skbs);
+ priv->tx_skbs = NULL;
+err_free_rx_skbs:
+ kfree(priv->rx_skbs);
+ priv->rx_skbs = NULL;
+ return -ENOMEM;
}
static void ftgmac100_init_rings(struct ftgmac100 *priv)
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index a146ceaf2ed6..aa8a87124b10 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -2578,6 +2578,7 @@ EXPORT_SYMBOL_GPL(enetc_free_si_resources);
static void enetc_setup_txbdr(struct enetc_hw *hw, struct enetc_bdr *tx_ring)
{
+ struct enetc_si *si = container_of(hw, struct enetc_si, hw);
int idx = tx_ring->index;
u32 tbmr;
@@ -2591,10 +2592,20 @@ static void enetc_setup_txbdr(struct enetc_hw *hw, struct enetc_bdr *tx_ring)
enetc_txbdr_wr(hw, idx, ENETC_TBLENR,
ENETC_RTBLENR_LEN(tx_ring->bd_count));
- /* clearing PI/CI registers for Tx not supported, adjust sw indexes */
+ /* For ENETC v1, clearing PI/CI registers for Tx not supported,
+ * adjust sw indexes
+ */
tx_ring->next_to_use = enetc_txbdr_rd(hw, idx, ENETC_TBPIR);
tx_ring->next_to_clean = enetc_txbdr_rd(hw, idx, ENETC_TBCIR);
+ if (tx_ring->next_to_use != tx_ring->next_to_clean &&
+ !is_enetc_rev1(si)) {
+ tx_ring->next_to_use = 0;
+ tx_ring->next_to_clean = 0;
+ enetc_txbdr_wr(hw, idx, ENETC_TBPIR, 0);
+ enetc_txbdr_wr(hw, idx, ENETC_TBCIR, 0);
+ }
+
/* enable Tx ints by setting pkt thr to 1 */
enetc_txbdr_wr(hw, idx, ENETC_TBICR0, ENETC_TBICR0_ICEN | 0x1);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc4_hw.h b/drivers/net/ethernet/freescale/enetc/enetc4_hw.h
index 3ed0f7a02767..719c88ceb801 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc4_hw.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc4_hw.h
@@ -134,6 +134,12 @@
/* Port operational register */
#define ENETC4_POR 0x4100
+#define POR_TXDIS BIT(0)
+#define POR_RXDIS BIT(1)
+
+/* Port status register */
+#define ENETC4_PSR 0x4104
+#define PSR_RX_BUSY BIT(1)
/* Port traffic class a transmit maximum SDU register */
#define ENETC4_PTCTMSDUR(a) ((a) * 0x20 + 0x4208)
@@ -173,6 +179,11 @@
/* Port internal MDIO base address, use to access PCS */
#define ENETC4_PM_IMDIO_BASE 0x5030
+/* Port MAC 0/1 Interrupt Event Register */
+#define ENETC4_PM_IEVENT(mac) (0x5040 + (mac) * 0x400)
+#define PM_IEVENT_TX_EMPTY BIT(5)
+#define PM_IEVENT_RX_EMPTY BIT(6)
+
/* Port MAC 0/1 Pause Quanta Register */
#define ENETC4_PM_PAUSE_QUANTA(mac) (0x5054 + (mac) * 0x400)
diff --git a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c
index 689b9f13c5eb..56899f2254aa 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c
@@ -444,20 +444,11 @@ static void enetc4_set_trx_frame_size(struct enetc_pf *pf)
enetc4_pf_reset_tc_msdu(&si->hw);
}
-static void enetc4_enable_trx(struct enetc_pf *pf)
-{
- struct enetc_hw *hw = &pf->si->hw;
-
- /* Enable port transmit/receive */
- enetc_port_wr(hw, ENETC4_POR, 0);
-}
-
static void enetc4_configure_port(struct enetc_pf *pf)
{
enetc4_configure_port_si(pf);
enetc4_set_trx_frame_size(pf);
enetc_set_default_rss_key(pf);
- enetc4_enable_trx(pf);
}
static int enetc4_init_ntmp_user(struct enetc_si *si)
@@ -801,15 +792,112 @@ static void enetc4_set_tx_pause(struct enetc_pf *pf, int num_rxbdr, bool tx_paus
enetc_port_wr(hw, ENETC4_PPAUOFFTR, pause_off_thresh);
}
-static void enetc4_enable_mac(struct enetc_pf *pf, bool en)
+static void enetc4_mac_wait_tx_empty(struct enetc_si *si, int mac)
+{
+ u32 val;
+
+ if (read_poll_timeout(enetc_port_rd, val,
+ val & PM_IEVENT_TX_EMPTY,
+ 100, 10000, false, &si->hw,
+ ENETC4_PM_IEVENT(mac)))
+ dev_warn(&si->pdev->dev,
+ "MAC %d TX is not empty\n", mac);
+}
+
+static void enetc4_mac_tx_graceful_stop(struct enetc_pf *pf)
+{
+ struct enetc_hw *hw = &pf->si->hw;
+ struct enetc_si *si = pf->si;
+ u32 val;
+
+ val = enetc_port_rd(hw, ENETC4_POR);
+ val |= POR_TXDIS;
+ enetc_port_wr(hw, ENETC4_POR, val);
+
+ if (enetc_is_pseudo_mac(si))
+ return;
+
+ enetc4_mac_wait_tx_empty(si, 0);
+ if (si->hw_features & ENETC_SI_F_QBU)
+ enetc4_mac_wait_tx_empty(si, 1);
+
+ val = enetc_port_mac_rd(si, ENETC4_PM_CMD_CFG(0));
+ val &= ~PM_CMD_CFG_TX_EN;
+ enetc_port_mac_wr(si, ENETC4_PM_CMD_CFG(0), val);
+}
+
+static void enetc4_mac_tx_enable(struct enetc_pf *pf)
{
+ struct enetc_hw *hw = &pf->si->hw;
struct enetc_si *si = pf->si;
u32 val;
val = enetc_port_mac_rd(si, ENETC4_PM_CMD_CFG(0));
- val &= ~(PM_CMD_CFG_TX_EN | PM_CMD_CFG_RX_EN);
- val |= en ? (PM_CMD_CFG_TX_EN | PM_CMD_CFG_RX_EN) : 0;
+ val |= PM_CMD_CFG_TX_EN;
+ enetc_port_mac_wr(si, ENETC4_PM_CMD_CFG(0), val);
+ val = enetc_port_rd(hw, ENETC4_POR);
+ val &= ~POR_TXDIS;
+ enetc_port_wr(hw, ENETC4_POR, val);
+}
+
+static void enetc4_mac_wait_rx_empty(struct enetc_si *si, int mac)
+{
+ u32 val;
+
+ if (read_poll_timeout(enetc_port_rd, val,
+ val & PM_IEVENT_RX_EMPTY,
+ 100, 10000, false, &si->hw,
+ ENETC4_PM_IEVENT(mac)))
+ dev_warn(&si->pdev->dev,
+ "MAC %d RX is not empty\n", mac);
+}
+
+static void enetc4_mac_rx_graceful_stop(struct enetc_pf *pf)
+{
+ struct enetc_hw *hw = &pf->si->hw;
+ struct enetc_si *si = pf->si;
+ u32 val;
+
+ if (enetc_is_pseudo_mac(si))
+ goto check_rx_busy;
+
+ if (si->hw_features & ENETC_SI_F_QBU) {
+ val = enetc_port_rd(hw, ENETC4_PM_CMD_CFG(1));
+ val &= ~PM_CMD_CFG_RX_EN;
+ enetc_port_wr(hw, ENETC4_PM_CMD_CFG(1), val);
+ enetc4_mac_wait_rx_empty(si, 1);
+ }
+
+ val = enetc_port_rd(hw, ENETC4_PM_CMD_CFG(0));
+ val &= ~PM_CMD_CFG_RX_EN;
+ enetc_port_wr(hw, ENETC4_PM_CMD_CFG(0), val);
+ enetc4_mac_wait_rx_empty(si, 0);
+
+check_rx_busy:
+ if (read_poll_timeout(enetc_port_rd, val,
+ !(val & PSR_RX_BUSY),
+ 100, 10000, false, hw,
+ ENETC4_PSR))
+ dev_warn(&si->pdev->dev, "Port RX busy\n");
+
+ val = enetc_port_rd(hw, ENETC4_POR);
+ val |= POR_RXDIS;
+ enetc_port_wr(hw, ENETC4_POR, val);
+}
+
+static void enetc4_mac_rx_enable(struct enetc_pf *pf)
+{
+ struct enetc_hw *hw = &pf->si->hw;
+ struct enetc_si *si = pf->si;
+ u32 val;
+
+ val = enetc_port_rd(hw, ENETC4_POR);
+ val &= ~POR_RXDIS;
+ enetc_port_wr(hw, ENETC4_POR, val);
+
+ val = enetc_port_mac_rd(si, ENETC4_PM_CMD_CFG(0));
+ val |= PM_CMD_CFG_RX_EN;
enetc_port_mac_wr(si, ENETC4_PM_CMD_CFG(0), val);
}
@@ -853,7 +941,8 @@ static void enetc4_pl_mac_link_up(struct phylink_config *config,
enetc4_set_hd_flow_control(pf, hd_fc);
enetc4_set_tx_pause(pf, priv->num_rx_rings, tx_pause);
enetc4_set_rx_pause(pf, rx_pause);
- enetc4_enable_mac(pf, true);
+ enetc4_mac_tx_enable(pf);
+ enetc4_mac_rx_enable(pf);
}
static void enetc4_pl_mac_link_down(struct phylink_config *config,
@@ -862,7 +951,8 @@ static void enetc4_pl_mac_link_down(struct phylink_config *config,
{
struct enetc_pf *pf = phylink_to_enetc_pf(config);
- enetc4_enable_mac(pf, false);
+ enetc4_mac_rx_graceful_stop(pf);
+ enetc4_mac_tx_graceful_stop(pf);
}
static const struct phylink_mac_ops enetc_pl_mac_ops = {
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
index fed89d4f1e1d..7c17acaf7a38 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
@@ -795,9 +795,17 @@ static int enetc_set_rxfh(struct net_device *ndev,
struct enetc_si *si = priv->si;
int err = 0;
+ if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE &&
+ rxfh->hfunc != ETH_RSS_HASH_TOP)
+ return -EOPNOTSUPP;
+
/* set hash key, if PF */
- if (rxfh->key && enetc_si_is_pf(si))
+ if (rxfh->key) {
+ if (!enetc_si_is_pf(si))
+ return -EOPNOTSUPP;
+
enetc_set_rss_key(si, rxfh->key);
+ }
/* set RSS table */
if (rxfh->indir)
@@ -813,6 +821,8 @@ static void enetc_get_ringparam(struct net_device *ndev,
{
struct enetc_ndev_priv *priv = netdev_priv(ndev);
+ ring->rx_max_pending = priv->rx_bd_count;
+ ring->tx_max_pending = priv->tx_bd_count;
ring->rx_pending = priv->rx_bd_count;
ring->tx_pending = priv->tx_bd_count;
diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c
index 4b7bad9a485d..56801c2009d5 100644
--- a/drivers/net/ethernet/freescale/fec_ptp.c
+++ b/drivers/net/ethernet/freescale/fec_ptp.c
@@ -545,9 +545,6 @@ static int fec_ptp_enable(struct ptp_clock_info *ptp,
if (rq->perout.flags)
return -EOPNOTSUPP;
- if (rq->perout.index != fep->pps_channel)
- return -EOPNOTSUPP;
-
period.tv_sec = rq->perout.period.sec;
period.tv_nsec = rq->perout.period.nsec;
period_ns = timespec64_to_ns(&period);
diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
index ab67c709d5a0..1cd1f3f2930a 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
@@ -313,14 +313,13 @@ static int iavf_get_sset_count(struct net_device *netdev, int sset)
{
/* Report the maximum number queues, even if not every queue is
* currently configured. Since allocation of queues is in pairs,
- * use netdev->real_num_tx_queues * 2. The real_num_tx_queues is set
- * at device creation and never changes.
+ * use netdev->num_tx_queues * 2. The num_tx_queues is set at
+ * device creation and never changes.
*/
if (sset == ETH_SS_STATS)
return IAVF_STATS_LEN +
- (IAVF_QUEUE_STATS_LEN * 2 *
- netdev->real_num_tx_queues);
+ (IAVF_QUEUE_STATS_LEN * 2 * netdev->num_tx_queues);
else
return -EINVAL;
}
@@ -345,19 +344,19 @@ static void iavf_get_ethtool_stats(struct net_device *netdev,
iavf_add_ethtool_stats(&data, adapter, iavf_gstrings_stats);
rcu_read_lock();
- /* As num_active_queues describe both tx and rx queues, we can use
- * it to iterate over rings' stats.
+ /* Use num_tx_queues to report stats for the maximum number of queues.
+ * Queues beyond num_active_queues will report zero.
*/
- for (i = 0; i < adapter->num_active_queues; i++) {
- struct iavf_ring *ring;
+ for (i = 0; i < netdev->num_tx_queues; i++) {
+ struct iavf_ring *tx_ring = NULL, *rx_ring = NULL;
- /* Tx rings stats */
- ring = &adapter->tx_rings[i];
- iavf_add_queue_stats(&data, ring);
+ if (i < adapter->num_active_queues) {
+ tx_ring = &adapter->tx_rings[i];
+ rx_ring = &adapter->rx_rings[i];
+ }
- /* Rx rings stats */
- ring = &adapter->rx_rings[i];
- iavf_add_queue_stats(&data, ring);
+ iavf_add_queue_stats(&data, tx_ring);
+ iavf_add_queue_stats(&data, rx_ring);
}
rcu_read_unlock();
}
@@ -376,9 +375,9 @@ static void iavf_get_stat_strings(struct net_device *netdev, u8 *data)
iavf_add_stat_strings(&data, iavf_gstrings_stats);
/* Queues are always allocated in pairs, so we just use
- * real_num_tx_queues for both Tx and Rx queues.
+ * num_tx_queues for both Tx and Rx queues.
*/
- for (i = 0; i < netdev->real_num_tx_queues; i++) {
+ for (i = 0; i < netdev->num_tx_queues; i++) {
iavf_add_stat_strings(&data, iavf_gstrings_queue_stats,
"tx", i);
iavf_add_stat_strings(&data, iavf_gstrings_queue_stats,
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 2b2b22af42be..eb3a48330cc1 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -840,6 +840,28 @@ static inline void ice_tx_xsk_pool(struct ice_vsi *vsi, u16 qid)
}
/**
+ * ice_get_max_txq - return the maximum number of Tx queues for in a PF
+ * @pf: PF structure
+ *
+ * Return: maximum number of Tx queues
+ */
+static inline int ice_get_max_txq(struct ice_pf *pf)
+{
+ return min(num_online_cpus(), pf->hw.func_caps.common_cap.num_txq);
+}
+
+/**
+ * ice_get_max_rxq - return the maximum number of Rx queues for in a PF
+ * @pf: PF structure
+ *
+ * Return: maximum number of Rx queues
+ */
+static inline int ice_get_max_rxq(struct ice_pf *pf)
+{
+ return min(num_online_cpus(), pf->hw.func_caps.common_cap.num_rxq);
+}
+
+/**
* ice_get_main_vsi - Get the PF VSI
* @pf: PF instance
*
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 301947d53ede..e6a20af6f63d 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -1930,6 +1930,17 @@ __ice_get_ethtool_stats(struct net_device *netdev,
int i = 0;
char *p;
+ if (ice_is_port_repr_netdev(netdev)) {
+ ice_update_eth_stats(vsi);
+
+ for (j = 0; j < ICE_VSI_STATS_LEN; j++) {
+ p = (char *)vsi + ice_gstrings_vsi_stats[j].stat_offset;
+ data[i++] = (ice_gstrings_vsi_stats[j].sizeof_stat ==
+ sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+ }
+ return;
+ }
+
ice_update_pf_stats(pf);
ice_update_vsi_stats(vsi);
@@ -1939,9 +1950,6 @@ __ice_get_ethtool_stats(struct net_device *netdev,
sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
}
- if (ice_is_port_repr_netdev(netdev))
- return;
-
/* populate per queue stats */
rcu_read_lock();
@@ -3774,24 +3782,6 @@ ice_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info)
}
/**
- * ice_get_max_txq - return the maximum number of Tx queues for in a PF
- * @pf: PF structure
- */
-static int ice_get_max_txq(struct ice_pf *pf)
-{
- return min(num_online_cpus(), pf->hw.func_caps.common_cap.num_txq);
-}
-
-/**
- * ice_get_max_rxq - return the maximum number of Rx queues for in a PF
- * @pf: PF structure
- */
-static int ice_get_max_rxq(struct ice_pf *pf)
-{
- return min(num_online_cpus(), pf->hw.func_caps.common_cap.num_rxq);
-}
-
-/**
* ice_get_combined_cnt - return the current number of combined channels
* @vsi: PF VSI pointer
*
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index e7308e381e2f..3c36e3641b9e 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -4699,8 +4699,8 @@ static int ice_cfg_netdev(struct ice_vsi *vsi)
struct net_device *netdev;
u8 mac_addr[ETH_ALEN];
- netdev = alloc_etherdev_mqs(sizeof(*np), vsi->alloc_txq,
- vsi->alloc_rxq);
+ netdev = alloc_etherdev_mqs(sizeof(*np), ice_get_max_txq(vsi->back),
+ ice_get_max_rxq(vsi->back));
if (!netdev)
return -ENOMEM;
diff --git a/drivers/net/ethernet/intel/ice/ice_repr.c b/drivers/net/ethernet/intel/ice/ice_repr.c
index 90f99443a922..096566c697f4 100644
--- a/drivers/net/ethernet/intel/ice/ice_repr.c
+++ b/drivers/net/ethernet/intel/ice/ice_repr.c
@@ -2,6 +2,7 @@
/* Copyright (C) 2019-2021, Intel Corporation. */
#include "ice.h"
+#include "ice_lib.h"
#include "ice_eswitch.h"
#include "devlink/devlink.h"
#include "devlink/port.h"
@@ -67,7 +68,7 @@ ice_repr_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats)
return;
vsi = repr->src_vsi;
- ice_update_vsi_stats(vsi);
+ ice_update_eth_stats(vsi);
eth_stats = &vsi->eth_stats;
stats->tx_packets = eth_stats->tx_unicast + eth_stats->tx_broadcast +
@@ -315,7 +316,7 @@ ice_repr_reg_netdev(struct net_device *netdev, const struct net_device_ops *ops)
static int ice_repr_ready_vf(struct ice_repr *repr)
{
- return !ice_check_vf_ready_for_cfg(repr->vf);
+ return ice_check_vf_ready_for_cfg(repr->vf);
}
static int ice_repr_ready_sf(struct ice_repr *repr)
diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h
index b206fba092c8..ec1b75f039bb 100644
--- a/drivers/net/ethernet/intel/idpf/idpf.h
+++ b/drivers/net/ethernet/intel/idpf/idpf.h
@@ -1066,7 +1066,7 @@ bool idpf_vport_set_hsplit(const struct idpf_vport *vport, u8 val);
int idpf_idc_init(struct idpf_adapter *adapter);
int idpf_idc_init_aux_core_dev(struct idpf_adapter *adapter,
enum iidc_function_type ftype);
-void idpf_idc_deinit_core_aux_device(struct iidc_rdma_core_dev_info *cdev_info);
+void idpf_idc_deinit_core_aux_device(struct idpf_adapter *adapter);
void idpf_idc_deinit_vport_aux_device(struct iidc_rdma_vport_dev_info *vdev_info);
void idpf_idc_issue_reset_event(struct iidc_rdma_core_dev_info *cdev_info);
void idpf_idc_vdev_mtu_event(struct iidc_rdma_vport_dev_info *vdev_info,
diff --git a/drivers/net/ethernet/intel/idpf/idpf_idc.c b/drivers/net/ethernet/intel/idpf/idpf_idc.c
index bd4785fb8d3e..7e4f4ac92653 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_idc.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_idc.c
@@ -470,10 +470,11 @@ err_privd_alloc:
/**
* idpf_idc_deinit_core_aux_device - de-initialize Auxiliary Device(s)
- * @cdev_info: IDC core device info pointer
+ * @adapter: driver private data structure
*/
-void idpf_idc_deinit_core_aux_device(struct iidc_rdma_core_dev_info *cdev_info)
+void idpf_idc_deinit_core_aux_device(struct idpf_adapter *adapter)
{
+ struct iidc_rdma_core_dev_info *cdev_info = adapter->cdev_info;
struct iidc_rdma_priv_dev_info *privd;
if (!cdev_info)
@@ -485,6 +486,7 @@ void idpf_idc_deinit_core_aux_device(struct iidc_rdma_core_dev_info *cdev_info)
kfree(privd->mapped_mem_regions);
kfree(privd);
kfree(cdev_info);
+ adapter->cdev_info = NULL;
}
/**
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
index 252259993022..f6b3b15364ff 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
@@ -1860,13 +1860,13 @@ static int idpf_rxq_group_alloc(struct idpf_vport *vport,
idpf_queue_assign(HSPLIT_EN, q, hs);
idpf_queue_assign(RSC_EN, q, rsc);
- bufq_set->num_refillqs = num_rxq;
bufq_set->refillqs = kcalloc(num_rxq, swq_size,
GFP_KERNEL);
if (!bufq_set->refillqs) {
err = -ENOMEM;
goto err_alloc;
}
+ bufq_set->num_refillqs = num_rxq;
for (unsigned int k = 0; k < bufq_set->num_refillqs; k++) {
struct idpf_sw_queue *refillq =
&bufq_set->refillqs[k];
diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
index d5a877e1fef8..113ecfc16dd7 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
@@ -3668,7 +3668,7 @@ void idpf_vc_core_deinit(struct idpf_adapter *adapter)
idpf_ptp_release(adapter);
idpf_deinit_task(adapter);
- idpf_idc_deinit_core_aux_device(adapter->cdev_info);
+ idpf_idc_deinit_core_aux_device(adapter);
idpf_rel_rx_pt_lkup(adapter);
idpf_intr_rel(adapter);
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
index cb30108f2bf6..cc8c4ef8038f 100644
--- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
+++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
@@ -244,6 +244,25 @@ out:
return 0;
}
+static bool
+mtk_flow_is_valid_idev(const struct mtk_eth *eth, const struct net_device *idev)
+{
+ size_t i;
+
+ if (!idev)
+ return false;
+
+ for (i = 0; i < ARRAY_SIZE(eth->netdev); i++) {
+ if (!eth->netdev[i])
+ continue;
+
+ if (idev->netdev_ops == eth->netdev[i]->netdev_ops)
+ return true;
+ }
+
+ return false;
+}
+
static int
mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f,
int ppe_index)
@@ -270,7 +289,7 @@ mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f,
flow_rule_match_meta(rule, &match);
if (mtk_is_netsys_v2_or_greater(eth)) {
idev = __dev_get_by_index(&init_net, match.key->ingress_ifindex);
- if (idev && idev->netdev_ops == eth->netdev[0]->netdev_ops) {
+ if (mtk_flow_is_valid_idev(eth, idev)) {
struct mtk_mac *mac = netdev_priv(idev);
if (WARN_ON(mac->ppe_idx >= eth->soc->ppe_num))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index 6698ac55a4bf..73cf0321bb86 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -107,9 +107,7 @@ mlx5_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
if (err)
return err;
- err = mlx5_fw_version_query(dev, &running_fw, &stored_fw);
- if (err)
- return err;
+ mlx5_fw_version_query(dev, &running_fw, &stored_fw);
snprintf(version_str, sizeof(version_str), "%d.%d.%04d",
mlx5_fw_ver_major(running_fw), mlx5_fw_ver_minor(running_fw),
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 7a9ee36b8dca..01f6aecc4fcc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -3761,6 +3761,8 @@ int esw_offloads_enable(struct mlx5_eswitch *esw)
return 0;
err_vports:
+ /* rollback to legacy, indicates don't unregister the uplink netdev */
+ esw->dev->priv.flags |= MLX5_PRIV_FLAGS_SWITCH_LEGACY;
mlx5_esw_offloads_rep_unload(esw, MLX5_VPORT_UPLINK);
err_uplink:
esw_offloads_steering_cleanup(esw);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index eeb4437975f2..c1f220e5fe18 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -822,48 +822,63 @@ mlx5_fw_image_pending(struct mlx5_core_dev *dev,
return 0;
}
-int mlx5_fw_version_query(struct mlx5_core_dev *dev,
- u32 *running_ver, u32 *pending_ver)
+void mlx5_fw_version_query(struct mlx5_core_dev *dev,
+ u32 *running_ver, u32 *pending_ver)
{
u32 reg_mcqi_version[MLX5_ST_SZ_DW(mcqi_version)] = {};
bool pending_version_exists;
int component_index;
int err;
+ *running_ver = 0;
+ *pending_ver = 0;
+
if (!MLX5_CAP_GEN(dev, mcam_reg) || !MLX5_CAP_MCAM_REG(dev, mcqi) ||
!MLX5_CAP_MCAM_REG(dev, mcqs)) {
mlx5_core_warn(dev, "fw query isn't supported by the FW\n");
- return -EOPNOTSUPP;
+ return;
}
component_index = mlx5_get_boot_img_component_index(dev);
- if (component_index < 0)
- return component_index;
+ if (component_index < 0) {
+ mlx5_core_warn(dev, "fw query failed to find boot img component index, err %d\n",
+ component_index);
+ return;
+ }
+ *running_ver = U32_MAX; /* indicate failure */
err = mlx5_reg_mcqi_version_query(dev, component_index,
MCQI_FW_RUNNING_VERSION,
reg_mcqi_version);
- if (err)
- return err;
-
- *running_ver = MLX5_GET(mcqi_version, reg_mcqi_version, version);
-
+ if (!err)
+ *running_ver = MLX5_GET(mcqi_version, reg_mcqi_version,
+ version);
+ else
+ mlx5_core_warn(dev, "failed to query running version, err %d\n",
+ err);
+
+ *pending_ver = U32_MAX; /* indicate failure */
err = mlx5_fw_image_pending(dev, component_index, &pending_version_exists);
- if (err)
- return err;
+ if (err) {
+ mlx5_core_warn(dev, "failed to query pending image, err %d\n",
+ err);
+ return;
+ }
if (!pending_version_exists) {
*pending_ver = 0;
- return 0;
+ return;
}
err = mlx5_reg_mcqi_version_query(dev, component_index,
MCQI_FW_STORED_VERSION,
reg_mcqi_version);
- if (err)
- return err;
-
- *pending_ver = MLX5_GET(mcqi_version, reg_mcqi_version, version);
-
- return 0;
+ if (!err)
+ *pending_ver = MLX5_GET(mcqi_version, reg_mcqi_version,
+ version);
+ else
+ mlx5_core_warn(dev, "failed to query pending version, err %d\n",
+ err);
+
+ return;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c
index 62b6faa4276a..b8d5f6a44d26 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c
@@ -160,8 +160,11 @@ DEFINE_SHOW_ATTRIBUTE(members);
void mlx5_ldev_add_debugfs(struct mlx5_core_dev *dev)
{
+ struct mlx5_lag *ldev = mlx5_lag_dev(dev);
struct dentry *dbg;
+ if (!ldev)
+ return;
dbg = debugfs_create_dir("lag", mlx5_debugfs_get_dev_root(dev));
dev->priv.dbg.lag_debugfs = dbg;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index b635b423d972..1507e881d962 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -393,8 +393,8 @@ int mlx5_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
int mlx5_firmware_flash(struct mlx5_core_dev *dev, const struct firmware *fw,
struct netlink_ext_ack *extack);
-int mlx5_fw_version_query(struct mlx5_core_dev *dev,
- u32 *running_ver, u32 *stored_ver);
+void mlx5_fw_version_query(struct mlx5_core_dev *dev, u32 *running_ver,
+ u32 *stored_ver);
#ifdef CONFIG_MLX5_CORE_EN
int mlx5e_init(void);
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c b/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c
index 08270db2dee8..3c4563c8f403 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c
@@ -197,7 +197,7 @@ static int fbnic_dbg_bdq_desc_seq_show(struct seq_file *s, void *v)
return 0;
}
- for (i = 0; i <= ring->size_mask; i++) {
+ for (i = 0; i < (ring->size_mask + 1) * FBNIC_BD_FRAG_COUNT; i++) {
u64 bd = le64_to_cpu(ring->desc[i]);
seq_printf(s, "%04x %#04llx %#014llx\n", i,
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
index 9fb91d4f3971..9cd85a0d0c3a 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
@@ -927,7 +927,7 @@ static void fbnic_fill_bdq(struct fbnic_ring *bdq)
/* Force DMA writes to flush before writing to tail */
dma_wmb();
- writel(i, bdq->doorbell);
+ writel(i * FBNIC_BD_FRAG_COUNT, bdq->doorbell);
}
}
@@ -2564,7 +2564,7 @@ static void fbnic_enable_bdq(struct fbnic_ring *hpq, struct fbnic_ring *ppq)
hpq->tail = 0;
hpq->head = 0;
- log_size = fls(hpq->size_mask);
+ log_size = fls(hpq->size_mask) + ilog2(FBNIC_BD_FRAG_COUNT);
/* Store descriptor ring address and size */
fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_HPQ_BAL, lower_32_bits(hpq->dma));
@@ -2576,7 +2576,7 @@ static void fbnic_enable_bdq(struct fbnic_ring *hpq, struct fbnic_ring *ppq)
if (!ppq->size_mask)
goto write_ctl;
- log_size = fls(ppq->size_mask);
+ log_size = fls(ppq->size_mask) + ilog2(FBNIC_BD_FRAG_COUNT);
/* Add enabling of PPQ to BDQ control */
bdq_ctl |= FBNIC_QUEUE_BDQ_CTL_PPQ_ENABLE;
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h
index 980965274079..e03c9d2c38dc 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h
@@ -38,7 +38,7 @@ struct fbnic_net;
#define FBNIC_MAX_XDPQS 128u
/* These apply to TWQs, TCQ, RCQ */
-#define FBNIC_QUEUE_SIZE_MIN 16u
+#define FBNIC_QUEUE_SIZE_MIN 64u
#define FBNIC_QUEUE_SIZE_MAX SZ_64K
#define FBNIC_TXQ_SIZE_DEFAULT 1024
diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index a3845edf0e48..f0b5dd752f08 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -3053,6 +3053,11 @@ static void lan743x_phylink_mac_link_up(struct phylink_config *config,
else if (speed == SPEED_100)
mac_cr |= MAC_CR_CFG_L_;
+ if (duplex == DUPLEX_FULL)
+ mac_cr |= MAC_CR_DPX_;
+ else
+ mac_cr &= ~MAC_CR_DPX_;
+
lan743x_csr_write(adapter, MAC_CR, mac_cr);
lan743x_ptp_update_latency(adapter, speed);
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 9017e806ecda..09a53c977545 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -766,6 +766,13 @@ static void mana_get_rxbuf_cfg(struct mana_port_context *apc,
}
*frag_count = 1;
+
+ /* In the single-buffer path, napi_build_skb() must see the
+ * actual backing allocation size so skb->truesize reflects
+ * the full page (or higher-order page), not just the usable
+ * packet area.
+ */
+ *alloc_size = PAGE_SIZE << get_order(*alloc_size);
return;
}
@@ -3425,6 +3432,7 @@ static int add_adev(struct gdma_dev *gd, const char *name)
struct auxiliary_device *adev;
struct mana_adev *madev;
int ret;
+ int id;
madev = kzalloc_obj(*madev);
if (!madev)
@@ -3434,7 +3442,8 @@ static int add_adev(struct gdma_dev *gd, const char *name)
ret = mana_adev_idx_alloc();
if (ret < 0)
goto idx_fail;
- adev->id = ret;
+ id = ret;
+ adev->id = id;
adev->name = name;
adev->dev.parent = gd->gdma_context->dev;
@@ -3460,7 +3469,7 @@ add_fail:
auxiliary_device_uninit(adev);
init_fail:
- mana_adev_idx_free(adev->id);
+ mana_adev_idx_free(id);
idx_fail:
kfree(madev);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 8d040e611d5a..637e635bbf03 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -1719,13 +1719,18 @@ static int ionic_set_mac_address(struct net_device *netdev, void *sa)
if (ether_addr_equal(netdev->dev_addr, mac))
return 0;
- err = ionic_program_mac(lif, mac);
- if (err < 0)
- return err;
+ /* Only program macs for virtual functions to avoid losing the permanent
+ * Mac across warm reset/reboot.
+ */
+ if (lif->ionic->pdev->is_virtfn) {
+ err = ionic_program_mac(lif, mac);
+ if (err < 0)
+ return err;
- if (err > 0)
- netdev_dbg(netdev, "%s: SET and GET ATTR Mac are not equal-due to old FW running\n",
- __func__);
+ if (err > 0)
+ netdev_dbg(netdev, "%s: SET and GET ATTR Mac are not equal-due to old FW running\n",
+ __func__);
+ }
err = eth_prepare_mac_addr_change(netdev, addr);
if (err)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 6827c99bde8c..13d3cac056be 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -156,7 +156,7 @@ static void stmmac_tx_timer_arm(struct stmmac_priv *priv, u32 queue);
static void stmmac_flush_tx_descriptors(struct stmmac_priv *priv, int queue);
static void stmmac_set_dma_operation_mode(struct stmmac_priv *priv, u32 txmode,
u32 rxmode, u32 chan);
-static int stmmac_vlan_restore(struct stmmac_priv *priv);
+static void stmmac_vlan_restore(struct stmmac_priv *priv);
#ifdef CONFIG_DEBUG_FS
static const struct net_device_ops stmmac_netdev_ops;
@@ -6859,21 +6859,15 @@ del_vlan_error:
return ret;
}
-static int stmmac_vlan_restore(struct stmmac_priv *priv)
+static void stmmac_vlan_restore(struct stmmac_priv *priv)
{
- int ret;
-
if (!(priv->dev->features & NETIF_F_VLAN_FEATURES))
- return 0;
+ return;
if (priv->hw->num_vlan)
stmmac_restore_hw_vlan_rx_fltr(priv, priv->dev, priv->hw);
- ret = stmmac_vlan_update(priv, priv->num_double_vlans);
- if (ret)
- netdev_err(priv->dev, "Failed to restore VLANs\n");
-
- return ret;
+ stmmac_vlan_update(priv, priv->num_double_vlans);
}
static int stmmac_bpf(struct net_device *dev, struct netdev_bpf *bpf)
diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c
index 0a3cf2f848a5..a28a608f9bf4 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_common.c
+++ b/drivers/net/ethernet/ti/icssg/icssg_common.c
@@ -902,6 +902,7 @@ static void emac_dispatch_skb_zc(struct prueth_emac *emac, struct xdp_buff *xdp,
skb_reserve(skb, headroom);
skb_put(skb, pkt_len);
+ skb_copy_to_linear_data(skb, xdp->data, pkt_len);
skb->dev = ndev;
/* RX HW timestamp */
@@ -912,7 +913,6 @@ static void emac_dispatch_skb_zc(struct prueth_emac *emac, struct xdp_buff *xdp,
skb->offload_fwd_mark = emac->offload_fwd_mark;
skb->protocol = eth_type_trans(skb, ndev);
- skb_mark_for_recycle(skb);
napi_gro_receive(&emac->napi_rx, skb);
ndev->stats.rx_bytes += pkt_len;
ndev->stats.rx_packets++;
@@ -962,7 +962,6 @@ static int emac_rx_packet_zc(struct prueth_emac *emac, u32 flow_id,
pkt_len -= 4;
cppi5_desc_get_tags_ids(&desc_rx->hdr, &port_id, NULL);
psdata = cppi5_hdesc_get_psdata(desc_rx);
- k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx);
count++;
xsk_buff_set_size(xdp, pkt_len);
xsk_buff_dma_sync_for_cpu(xdp);
@@ -988,6 +987,7 @@ static int emac_rx_packet_zc(struct prueth_emac *emac, u32 flow_id,
emac_dispatch_skb_zc(emac, xdp, psdata);
xsk_buff_free(xdp);
}
+ k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx);
}
if (xdp_status & ICSSG_XDP_REDIR)
@@ -1057,7 +1057,6 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id, u32 *xdp_state)
/* firmware adds 4 CRC bytes, strip them */
pkt_len -= 4;
cppi5_desc_get_tags_ids(&desc_rx->hdr, &port_id, NULL);
- k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx);
/* if allocation fails we drop the packet but push the
* descriptor back to the ring with old page to prevent a stall
@@ -1115,6 +1114,7 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id, u32 *xdp_state)
ndev->stats.rx_packets++;
requeue:
+ k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx);
/* queue another RX DMA */
ret = prueth_dma_rx_push_mapped(emac, &emac->rx_chns, new_page,
PRUETH_MAX_PKT_SIZE);
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h
index 5ff742103beb..fcd3aaef27fc 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet.h
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h
@@ -105,7 +105,7 @@
#define XAXIDMA_BD_HAS_DRE_MASK 0xF00 /* Whether has DRE mask */
#define XAXIDMA_BD_WORDLEN_MASK 0xFF /* Whether has DRE mask */
-#define XAXIDMA_BD_CTRL_LENGTH_MASK 0x007FFFFF /* Requested len */
+#define XAXIDMA_BD_CTRL_LENGTH_MASK GENMASK(25, 0) /* Requested len */
#define XAXIDMA_BD_CTRL_TXSOF_MASK 0x08000000 /* First tx packet */
#define XAXIDMA_BD_CTRL_TXEOF_MASK 0x04000000 /* Last tx packet */
#define XAXIDMA_BD_CTRL_ALL_MASK 0x0C000000 /* All control bits */
@@ -130,7 +130,7 @@
#define XAXIDMA_BD_CTRL_TXEOF_MASK 0x04000000 /* Last tx packet */
#define XAXIDMA_BD_CTRL_ALL_MASK 0x0C000000 /* All control bits */
-#define XAXIDMA_BD_STS_ACTUAL_LEN_MASK 0x007FFFFF /* Actual len */
+#define XAXIDMA_BD_STS_ACTUAL_LEN_MASK GENMASK(25, 0) /* Actual len */
#define XAXIDMA_BD_STS_COMPLETE_MASK 0x80000000 /* Completed */
#define XAXIDMA_BD_STS_DEC_ERR_MASK 0x40000000 /* Decode error */
#define XAXIDMA_BD_STS_SLV_ERR_MASK 0x20000000 /* Slave error */
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index b06e4c37ff61..263c4b67fd5a 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -770,8 +770,8 @@ static int axienet_device_reset(struct net_device *ndev)
* @first_bd: Index of first descriptor to clean up
* @nr_bds: Max number of descriptors to clean up
* @force: Whether to clean descriptors even if not complete
- * @sizep: Pointer to a u32 filled with the total sum of all bytes
- * in all cleaned-up descriptors. Ignored if NULL.
+ * @sizep: Pointer to a u32 accumulating the total byte count of
+ * completed packets (using skb->len). Ignored if NULL.
* @budget: NAPI budget (use 0 when not called from NAPI poll)
*
* Would either be called after a successful transmit operation, or after
@@ -805,6 +805,8 @@ static int axienet_free_tx_chain(struct axienet_local *lp, u32 first_bd,
DMA_TO_DEVICE);
if (cur_p->skb && (status & XAXIDMA_BD_STS_COMPLETE_MASK)) {
+ if (sizep)
+ *sizep += cur_p->skb->len;
napi_consume_skb(cur_p->skb, budget);
packets++;
}
@@ -818,9 +820,6 @@ static int axienet_free_tx_chain(struct axienet_local *lp, u32 first_bd,
wmb();
cur_p->cntrl = 0;
cur_p->status = 0;
-
- if (sizep)
- *sizep += status & XAXIDMA_BD_STS_ACTUAL_LEN_MASK;
}
if (!force) {
diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index 5db841377199..ce8924613363 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -480,11 +480,16 @@ static void sfp_quirk_ubnt_uf_instant(const struct sfp_eeprom_id *id,
{
/* Ubiquiti U-Fiber Instant module claims that support all transceiver
* types including 10G Ethernet which is not truth. So clear all claimed
- * modes and set only one mode which module supports: 1000baseX_Full.
+ * modes and set only one mode which module supports: 1000baseX_Full,
+ * along with the Autoneg and pause bits.
*/
linkmode_zero(caps->link_modes);
linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT,
caps->link_modes);
+ linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, caps->link_modes);
+ linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, caps->link_modes);
+ linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, caps->link_modes);
+
phy_interface_zero(caps->interfaces);
__set_bit(PHY_INTERFACE_MODE_1000BASEX, caps->interfaces);
}
diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c
index b7282f5c9632..120aeb539d9f 100644
--- a/drivers/net/team/team_core.c
+++ b/drivers/net/team/team_core.c
@@ -2058,6 +2058,68 @@ static const struct ethtool_ops team_ethtool_ops = {
* rt netlink interface
***********************/
+/* For tx path we need a linkup && enabled port and for parse any port
+ * suffices.
+ */
+static struct team_port *team_header_port_get_rcu(struct team *team,
+ bool txable)
+{
+ struct team_port *port;
+
+ list_for_each_entry_rcu(port, &team->port_list, list) {
+ if (!txable || team_port_txable(port))
+ return port;
+ }
+
+ return NULL;
+}
+
+static int team_header_create(struct sk_buff *skb, struct net_device *team_dev,
+ unsigned short type, const void *daddr,
+ const void *saddr, unsigned int len)
+{
+ struct team *team = netdev_priv(team_dev);
+ const struct header_ops *port_ops;
+ struct team_port *port;
+ int ret = 0;
+
+ rcu_read_lock();
+ port = team_header_port_get_rcu(team, true);
+ if (port) {
+ port_ops = READ_ONCE(port->dev->header_ops);
+ if (port_ops && port_ops->create)
+ ret = port_ops->create(skb, port->dev,
+ type, daddr, saddr, len);
+ }
+ rcu_read_unlock();
+ return ret;
+}
+
+static int team_header_parse(const struct sk_buff *skb,
+ const struct net_device *team_dev,
+ unsigned char *haddr)
+{
+ struct team *team = netdev_priv(team_dev);
+ const struct header_ops *port_ops;
+ struct team_port *port;
+ int ret = 0;
+
+ rcu_read_lock();
+ port = team_header_port_get_rcu(team, false);
+ if (port) {
+ port_ops = READ_ONCE(port->dev->header_ops);
+ if (port_ops && port_ops->parse)
+ ret = port_ops->parse(skb, port->dev, haddr);
+ }
+ rcu_read_unlock();
+ return ret;
+}
+
+static const struct header_ops team_header_ops = {
+ .create = team_header_create,
+ .parse = team_header_parse,
+};
+
static void team_setup_by_port(struct net_device *dev,
struct net_device *port_dev)
{
@@ -2066,7 +2128,8 @@ static void team_setup_by_port(struct net_device *dev,
if (port_dev->type == ARPHRD_ETHER)
dev->header_ops = team->header_ops_cache;
else
- dev->header_ops = port_dev->header_ops;
+ dev->header_ops = port_dev->header_ops ?
+ &team_header_ops : NULL;
dev->type = port_dev->type;
dev->hard_header_len = port_dev->hard_header_len;
dev->needed_headroom = port_dev->needed_headroom;
diff --git a/drivers/net/tun_vnet.h b/drivers/net/tun_vnet.h
index a5f93b6c4482..fa5cab9d3e55 100644
--- a/drivers/net/tun_vnet.h
+++ b/drivers/net/tun_vnet.h
@@ -244,7 +244,7 @@ tun_vnet_hdr_tnl_from_skb(unsigned int flags,
if (virtio_net_hdr_tnl_from_skb(skb, tnl_hdr, has_tnl_offload,
tun_vnet_is_little_endian(flags),
- vlan_hlen, true)) {
+ vlan_hlen, true, false)) {
struct virtio_net_hdr_v1 *hdr = &tnl_hdr->hash_hdr.hdr;
struct skb_shared_info *sinfo = skb_shinfo(skb);
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 72d6a9c6a5a2..c0b9bc5574e2 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -381,8 +381,6 @@ struct receive_queue {
struct xdp_buff **xsk_buffs;
};
-#define VIRTIO_NET_RSS_MAX_KEY_SIZE 40
-
/* Control VQ buffers: protected by the rtnl lock */
struct control_buf {
struct virtio_net_ctrl_hdr hdr;
@@ -486,7 +484,7 @@ struct virtnet_info {
/* Must be last as it ends in a flexible-array member. */
TRAILING_OVERLAP(struct virtio_net_rss_config_trailer, rss_trailer, hash_key_data,
- u8 rss_hash_key_data[VIRTIO_NET_RSS_MAX_KEY_SIZE];
+ u8 rss_hash_key_data[NETDEV_RSS_KEY_LEN];
);
};
static_assert(offsetof(struct virtnet_info, rss_trailer.hash_key_data) ==
@@ -3267,8 +3265,12 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan)
struct virtio_net_hdr_v1_hash_tunnel *hdr;
int num_sg;
unsigned hdr_len = vi->hdr_len;
+ bool feature_hdrlen;
bool can_push;
+ feature_hdrlen = virtio_has_feature(vi->vdev,
+ VIRTIO_NET_F_GUEST_HDRLEN);
+
pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
/* Make sure it's safe to cast between formats */
@@ -3288,7 +3290,7 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan)
if (virtio_net_hdr_tnl_from_skb(skb, hdr, vi->tx_tnl,
virtio_is_little_endian(vi->vdev), 0,
- false))
+ false, feature_hdrlen))
return -EPROTO;
if (vi->mergeable_rx_bufs)
@@ -3351,6 +3353,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
/* Don't wait up for transmitted skbs to be freed. */
if (!use_napi) {
skb_orphan(skb);
+ skb_dst_drop(skb);
nf_reset_ct(skb);
}
@@ -6703,6 +6706,7 @@ static int virtnet_probe(struct virtio_device *vdev)
struct virtnet_info *vi;
u16 max_queue_pairs;
int mtu = 0;
+ u16 key_sz;
/* Find if host supports multiqueue/rss virtio_net device */
max_queue_pairs = 1;
@@ -6837,14 +6841,13 @@ static int virtnet_probe(struct virtio_device *vdev)
}
if (vi->has_rss || vi->has_rss_hash_report) {
- vi->rss_key_size =
- virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size));
- if (vi->rss_key_size > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
- dev_err(&vdev->dev, "rss_max_key_size=%u exceeds the limit %u.\n",
- vi->rss_key_size, VIRTIO_NET_RSS_MAX_KEY_SIZE);
- err = -EINVAL;
- goto free;
- }
+ key_sz = virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size));
+
+ vi->rss_key_size = min_t(u16, key_sz, NETDEV_RSS_KEY_LEN);
+ if (key_sz > vi->rss_key_size)
+ dev_warn(&vdev->dev,
+ "rss_max_key_size=%u exceeds driver limit %u, clamping\n",
+ key_sz, vi->rss_key_size);
vi->rss_hash_types_supported =
virtio_cread32(vdev, offsetof(struct virtio_net_config, supported_hash_types));
diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
index 17c941aac32d..a94ac82a6136 100644
--- a/drivers/net/vxlan/vxlan_core.c
+++ b/drivers/net/vxlan/vxlan_core.c
@@ -1965,12 +1965,14 @@ static struct sk_buff *vxlan_na_create(struct sk_buff *request,
ns_olen = request->len - skb_network_offset(request) -
sizeof(struct ipv6hdr) - sizeof(*ns);
for (i = 0; i < ns_olen-1; i += (ns->opt[i+1]<<3)) {
- if (!ns->opt[i + 1]) {
+ if (!ns->opt[i + 1] || i + (ns->opt[i + 1] << 3) > ns_olen) {
kfree_skb(reply);
return NULL;
}
if (ns->opt[i] == ND_OPT_SOURCE_LL_ADDR) {
- daddr = ns->opt + i + sizeof(struct nd_opt_hdr);
+ if ((ns->opt[i + 1] << 3) >=
+ sizeof(struct nd_opt_hdr) + ETH_ALEN)
+ daddr = ns->opt + i + sizeof(struct nd_opt_hdr);
break;
}
}
diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c
index 49d959b2e148..85defe11750d 100644
--- a/drivers/net/wireless/ath/ath11k/dp_rx.c
+++ b/drivers/net/wireless/ath/ath11k/dp_rx.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: BSD-3-Clause-Clear
/*
* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
*/
#include <linux/ieee80211.h>
@@ -1110,9 +1110,8 @@ int ath11k_dp_rx_ampdu_stop(struct ath11k *ar,
struct ath11k_base *ab = ar->ab;
struct ath11k_peer *peer;
struct ath11k_sta *arsta = ath11k_sta_to_arsta(params->sta);
+ struct dp_rx_tid *rx_tid;
int vdev_id = arsta->arvif->vdev_id;
- dma_addr_t paddr;
- bool active;
int ret;
spin_lock_bh(&ab->base_lock);
@@ -1124,15 +1123,14 @@ int ath11k_dp_rx_ampdu_stop(struct ath11k *ar,
return -ENOENT;
}
- paddr = peer->rx_tid[params->tid].paddr;
- active = peer->rx_tid[params->tid].active;
+ rx_tid = &peer->rx_tid[params->tid];
- if (!active) {
+ if (!rx_tid->active) {
spin_unlock_bh(&ab->base_lock);
return 0;
}
- ret = ath11k_peer_rx_tid_reo_update(ar, peer, peer->rx_tid, 1, 0, false);
+ ret = ath11k_peer_rx_tid_reo_update(ar, peer, rx_tid, 1, 0, false);
spin_unlock_bh(&ab->base_lock);
if (ret) {
ath11k_warn(ab, "failed to update reo for rx tid %d: %d\n",
@@ -1141,7 +1139,8 @@ int ath11k_dp_rx_ampdu_stop(struct ath11k *ar,
}
ret = ath11k_wmi_peer_rx_reorder_queue_setup(ar, vdev_id,
- params->sta->addr, paddr,
+ params->sta->addr,
+ rx_tid->paddr,
params->tid, 1, 1);
if (ret)
ath11k_warn(ab, "failed to send wmi to delete rx tid %d\n",
diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c
index 5a82ede65dd3..244d5230a5bd 100644
--- a/drivers/net/wireless/ath/ath12k/dp_rx.c
+++ b/drivers/net/wireless/ath/ath12k/dp_rx.c
@@ -735,6 +735,7 @@ int ath12k_dp_rx_ampdu_stop(struct ath12k *ar,
struct ath12k_dp *dp = ath12k_ab_to_dp(ab);
struct ath12k_dp_link_peer *peer;
struct ath12k_sta *ahsta = ath12k_sta_to_ahsta(params->sta);
+ struct ath12k_dp_rx_tid *rx_tid;
struct ath12k_link_sta *arsta;
int vdev_id;
bool active;
@@ -770,7 +771,8 @@ int ath12k_dp_rx_ampdu_stop(struct ath12k *ar,
return 0;
}
- ret = ath12k_dp_arch_peer_rx_tid_reo_update(dp, peer, peer->dp_peer->rx_tid,
+ rx_tid = &peer->dp_peer->rx_tid[params->tid];
+ ret = ath12k_dp_arch_peer_rx_tid_reo_update(dp, peer, rx_tid,
1, 0, false);
spin_unlock_bh(&dp->dp_lock);
if (ret) {
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h b/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h
index 8d64a271bb94..36159a769916 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h
@@ -297,6 +297,11 @@ enum iwl_legacy_cmds {
SCAN_OFFLOAD_UPDATE_PROFILES_CMD = 0x6E,
/**
+ * @SCAN_START_NOTIFICATION_UMAC: uses &struct iwl_umac_scan_start
+ */
+ SCAN_START_NOTIFICATION_UMAC = 0xb2,
+
+ /**
* @MATCH_FOUND_NOTIFICATION: scan match found
*/
MATCH_FOUND_NOTIFICATION = 0xd9,
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h b/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h
index 60f0a4924ddf..46fcc32608e3 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h
@@ -1157,6 +1157,16 @@ enum iwl_umac_scan_abort_status {
};
/**
+ * struct iwl_umac_scan_start - scan start notification
+ * @uid: scan id, &enum iwl_umac_scan_uid_offsets
+ * @reserved: for future use
+ */
+struct iwl_umac_scan_start {
+ __le32 uid;
+ __le32 reserved;
+} __packed; /* SCAN_START_UMAC_API_S_VER_1 */
+
+/**
* struct iwl_umac_scan_complete - scan complete notification
* @uid: scan id, &enum iwl_umac_scan_uid_offsets
* @last_schedule: last scheduling line
diff --git a/drivers/net/wireless/intel/iwlwifi/mld/iface.c b/drivers/net/wireless/intel/iwlwifi/mld/iface.c
index 29df747c8938..9215fc7e2eca 100644
--- a/drivers/net/wireless/intel/iwlwifi/mld/iface.c
+++ b/drivers/net/wireless/intel/iwlwifi/mld/iface.c
@@ -111,14 +111,75 @@ static bool iwl_mld_is_nic_ack_enabled(struct iwl_mld *mld,
IEEE80211_HE_MAC_CAP2_ACK_EN);
}
-static void iwl_mld_set_he_support(struct iwl_mld *mld,
- struct ieee80211_vif *vif,
- struct iwl_mac_config_cmd *cmd)
+struct iwl_mld_mac_wifi_gen_sta_iter_data {
+ struct ieee80211_vif *vif;
+ struct iwl_mac_wifi_gen_support *support;
+};
+
+static void iwl_mld_mac_wifi_gen_sta_iter(void *_data,
+ struct ieee80211_sta *sta)
{
- if (vif->type == NL80211_IFTYPE_AP)
- cmd->wifi_gen.he_ap_support = 1;
- else
- cmd->wifi_gen.he_support = 1;
+ struct iwl_mld_sta *mld_sta = iwl_mld_sta_from_mac80211(sta);
+ struct iwl_mld_mac_wifi_gen_sta_iter_data *data = _data;
+ struct ieee80211_link_sta *link_sta;
+ unsigned int link_id;
+
+ if (mld_sta->vif != data->vif)
+ return;
+
+ for_each_sta_active_link(data->vif, sta, link_sta, link_id) {
+ if (link_sta->he_cap.has_he)
+ data->support->he_support = 1;
+ if (link_sta->eht_cap.has_eht)
+ data->support->eht_support = 1;
+ }
+}
+
+static void iwl_mld_set_wifi_gen(struct iwl_mld *mld,
+ struct ieee80211_vif *vif,
+ struct iwl_mac_wifi_gen_support *support)
+{
+ struct iwl_mld_mac_wifi_gen_sta_iter_data sta_iter_data = {
+ .vif = vif,
+ .support = support,
+ };
+ struct ieee80211_bss_conf *link_conf;
+ unsigned int link_id;
+
+ switch (vif->type) {
+ case NL80211_IFTYPE_MONITOR:
+ /* for sniffer, set to HW capabilities */
+ support->he_support = 1;
+ support->eht_support = mld->trans->cfg->eht_supported;
+ break;
+ case NL80211_IFTYPE_AP:
+ /* for AP set according to the link configs */
+ for_each_vif_active_link(vif, link_conf, link_id) {
+ support->he_ap_support |= link_conf->he_support;
+ support->eht_support |= link_conf->eht_support;
+ }
+ break;
+ default:
+ /*
+ * If we have MLO enabled, then the firmware needs to enable
+ * address translation for the station(s) we add. That depends
+ * on having EHT enabled in firmware, which in turn depends on
+ * mac80211 in the iteration below.
+ * However, mac80211 doesn't enable capabilities on the AP STA
+ * until it has parsed the association response successfully,
+ * so set EHT (and HE as a pre-requisite for EHT) when the vif
+ * is an MLD.
+ */
+ if (ieee80211_vif_is_mld(vif)) {
+ support->he_support = 1;
+ support->eht_support = 1;
+ }
+
+ ieee80211_iterate_stations_mtx(mld->hw,
+ iwl_mld_mac_wifi_gen_sta_iter,
+ &sta_iter_data);
+ break;
+ }
}
/* fill the common part for all interface types */
@@ -128,8 +189,6 @@ static void iwl_mld_mac_cmd_fill_common(struct iwl_mld *mld,
u32 action)
{
struct iwl_mld_vif *mld_vif = iwl_mld_vif_from_mac80211(vif);
- struct ieee80211_bss_conf *link_conf;
- unsigned int link_id;
lockdep_assert_wiphy(mld->wiphy);
@@ -147,29 +206,7 @@ static void iwl_mld_mac_cmd_fill_common(struct iwl_mld *mld,
cmd->nic_not_ack_enabled =
cpu_to_le32(!iwl_mld_is_nic_ack_enabled(mld, vif));
- /* If we have MLO enabled, then the firmware needs to enable
- * address translation for the station(s) we add. That depends
- * on having EHT enabled in firmware, which in turn depends on
- * mac80211 in the code below.
- * However, mac80211 doesn't enable HE/EHT until it has parsed
- * the association response successfully, so just skip all that
- * and enable both when we have MLO.
- */
- if (ieee80211_vif_is_mld(vif)) {
- iwl_mld_set_he_support(mld, vif, cmd);
- cmd->wifi_gen.eht_support = 1;
- return;
- }
-
- for_each_vif_active_link(vif, link_conf, link_id) {
- if (!link_conf->he_support)
- continue;
-
- iwl_mld_set_he_support(mld, vif, cmd);
-
- /* EHT, if supported, was already set above */
- break;
- }
+ iwl_mld_set_wifi_gen(mld, vif, &cmd->wifi_gen);
}
static void iwl_mld_fill_mac_cmd_sta(struct iwl_mld *mld,
diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c
index 0c53d6bd9651..71a9a72c9ac0 100644
--- a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c
@@ -1761,6 +1761,16 @@ static int iwl_mld_move_sta_state_up(struct iwl_mld *mld,
if (vif->type == NL80211_IFTYPE_STATION)
iwl_mld_link_set_2mhz_block(mld, vif, sta);
+
+ if (sta->tdls) {
+ /*
+ * update MAC since wifi generation flags may change,
+ * we also update MAC on association to the AP via the
+ * vif assoc change
+ */
+ iwl_mld_mac_fw_action(mld, vif, FW_CTXT_ACTION_MODIFY);
+ }
+
/* Now the link_sta's capabilities are set, update the FW */
iwl_mld_config_tlc(mld, vif, sta);
@@ -1873,6 +1883,15 @@ static int iwl_mld_move_sta_state_down(struct iwl_mld *mld,
/* just removed last TDLS STA, so enable PM */
iwl_mld_update_mac_power(mld, vif, false);
}
+
+ if (sta->tdls) {
+ /*
+ * update MAC since wifi generation flags may change,
+ * we also update MAC on disassociation to the AP via
+ * the vif assoc change
+ */
+ iwl_mld_mac_fw_action(mld, vif, FW_CTXT_ACTION_MODIFY);
+ }
} else {
return -EINVAL;
}
diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mld.c b/drivers/net/wireless/intel/iwlwifi/mld/mld.c
index 495e9d8f3af6..9af79297c3b6 100644
--- a/drivers/net/wireless/intel/iwlwifi/mld/mld.c
+++ b/drivers/net/wireless/intel/iwlwifi/mld/mld.c
@@ -171,6 +171,7 @@ static const struct iwl_hcmd_names iwl_mld_legacy_names[] = {
HCMD_NAME(MISSED_BEACONS_NOTIFICATION),
HCMD_NAME(MAC_PM_POWER_TABLE),
HCMD_NAME(MFUART_LOAD_NOTIFICATION),
+ HCMD_NAME(SCAN_START_NOTIFICATION_UMAC),
HCMD_NAME(RSS_CONFIG_CMD),
HCMD_NAME(SCAN_ITERATION_COMPLETE_UMAC),
HCMD_NAME(REPLY_RX_MPDU_CMD),
diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mlo.c b/drivers/net/wireless/intel/iwlwifi/mld/mlo.c
index f842f5183223..fbff5915f7fd 100644
--- a/drivers/net/wireless/intel/iwlwifi/mld/mlo.c
+++ b/drivers/net/wireless/intel/iwlwifi/mld/mlo.c
@@ -739,7 +739,7 @@ iwl_mld_set_link_sel_data(struct iwl_mld *mld,
/* Ignore any BSS that was not seen in the last MLO scan */
if (ktime_before(link_conf->bss->ts_boottime,
- mld->scan.last_mlo_scan_time))
+ mld->scan.last_mlo_scan_start_time))
continue;
data[n_data].link_id = link_id;
@@ -945,7 +945,7 @@ static void _iwl_mld_select_links(struct iwl_mld *mld,
if (!mld_vif->authorized || hweight16(usable_links) <= 1)
return;
- if (WARN(ktime_before(mld->scan.last_mlo_scan_time,
+ if (WARN(ktime_before(mld->scan.last_mlo_scan_start_time,
ktime_sub_ns(ktime_get_boottime_ns(),
5ULL * NSEC_PER_SEC)),
"Last MLO scan was too long ago, can't select links\n"))
diff --git a/drivers/net/wireless/intel/iwlwifi/mld/notif.c b/drivers/net/wireless/intel/iwlwifi/mld/notif.c
index 240526d8b632..9c88a8579a75 100644
--- a/drivers/net/wireless/intel/iwlwifi/mld/notif.c
+++ b/drivers/net/wireless/intel/iwlwifi/mld/notif.c
@@ -287,6 +287,8 @@ static void iwl_mld_handle_beacon_notification(struct iwl_mld *mld,
* at least enough bytes to cover the structure listed in the CMD_VER_ENTRY.
*/
+CMD_VERSIONS(scan_start_notif,
+ CMD_VER_ENTRY(1, iwl_umac_scan_start))
CMD_VERSIONS(scan_complete_notif,
CMD_VER_ENTRY(1, iwl_umac_scan_complete))
CMD_VERSIONS(scan_iter_complete_notif,
@@ -360,6 +362,7 @@ DEFINE_SIMPLE_CANCELLATION(datapath_monitor, iwl_datapath_monitor_notif,
link_id)
DEFINE_SIMPLE_CANCELLATION(roc, iwl_roc_notif, activity)
DEFINE_SIMPLE_CANCELLATION(scan_complete, iwl_umac_scan_complete, uid)
+DEFINE_SIMPLE_CANCELLATION(scan_start, iwl_umac_scan_start, uid)
DEFINE_SIMPLE_CANCELLATION(probe_resp_data, iwl_probe_resp_data_notif,
mac_id)
DEFINE_SIMPLE_CANCELLATION(uapsd_misbehaving_ap, iwl_uapsd_misbehaving_ap_notif,
@@ -402,6 +405,8 @@ const struct iwl_rx_handler iwl_mld_rx_handlers[] = {
RX_HANDLER_SYNC)
RX_HANDLER_NO_OBJECT(LEGACY_GROUP, BA_NOTIF, compressed_ba_notif,
RX_HANDLER_SYNC)
+ RX_HANDLER_OF_SCAN(LEGACY_GROUP, SCAN_START_NOTIFICATION_UMAC,
+ scan_start_notif)
RX_HANDLER_OF_SCAN(LEGACY_GROUP, SCAN_COMPLETE_UMAC,
scan_complete_notif)
RX_HANDLER_NO_OBJECT(LEGACY_GROUP, SCAN_ITERATION_COMPLETE_UMAC,
diff --git a/drivers/net/wireless/intel/iwlwifi/mld/scan.c b/drivers/net/wireless/intel/iwlwifi/mld/scan.c
index a1a4cf3ab3d3..abd4281b4b0e 100644
--- a/drivers/net/wireless/intel/iwlwifi/mld/scan.c
+++ b/drivers/net/wireless/intel/iwlwifi/mld/scan.c
@@ -473,6 +473,9 @@ iwl_mld_scan_get_cmd_gen_flags(struct iwl_mld *mld,
params->flags & NL80211_SCAN_FLAG_COLOCATED_6GHZ)
flags |= IWL_UMAC_SCAN_GEN_FLAGS_V2_TRIGGER_UHB_SCAN;
+ if (scan_status == IWL_MLD_SCAN_INT_MLO)
+ flags |= IWL_UMAC_SCAN_GEN_FLAGS_V2_NTF_START;
+
if (params->enable_6ghz_passive)
flags |= IWL_UMAC_SCAN_GEN_FLAGS_V2_6GHZ_PASSIVE_SCAN;
@@ -1817,9 +1820,6 @@ static void iwl_mld_int_mlo_scan_start(struct iwl_mld *mld,
ret = _iwl_mld_single_scan_start(mld, vif, req, &ies,
IWL_MLD_SCAN_INT_MLO);
- if (!ret)
- mld->scan.last_mlo_scan_time = ktime_get_boottime_ns();
-
IWL_DEBUG_SCAN(mld, "Internal MLO scan: ret=%d\n", ret);
}
@@ -1904,6 +1904,30 @@ void iwl_mld_handle_match_found_notif(struct iwl_mld *mld,
ieee80211_sched_scan_results(mld->hw);
}
+void iwl_mld_handle_scan_start_notif(struct iwl_mld *mld,
+ struct iwl_rx_packet *pkt)
+{
+ struct iwl_umac_scan_complete *notif = (void *)pkt->data;
+ u32 uid = le32_to_cpu(notif->uid);
+
+ if (IWL_FW_CHECK(mld, uid >= ARRAY_SIZE(mld->scan.uid_status),
+ "FW reports out-of-range scan UID %d\n", uid))
+ return;
+
+ if (IWL_FW_CHECK(mld, !(mld->scan.uid_status[uid] & mld->scan.status),
+ "FW reports scan UID %d we didn't trigger\n", uid))
+ return;
+
+ IWL_DEBUG_SCAN(mld, "Scan started: uid=%u type=%u\n", uid,
+ mld->scan.uid_status[uid]);
+ if (IWL_FW_CHECK(mld, mld->scan.uid_status[uid] != IWL_MLD_SCAN_INT_MLO,
+ "FW reports scan start notification %d we didn't trigger\n",
+ mld->scan.uid_status[uid]))
+ return;
+
+ mld->scan.last_mlo_scan_start_time = ktime_get_boottime_ns();
+}
+
void iwl_mld_handle_scan_complete_notif(struct iwl_mld *mld,
struct iwl_rx_packet *pkt)
{
diff --git a/drivers/net/wireless/intel/iwlwifi/mld/scan.h b/drivers/net/wireless/intel/iwlwifi/mld/scan.h
index 69110f0cfc8e..de5620e7f463 100644
--- a/drivers/net/wireless/intel/iwlwifi/mld/scan.h
+++ b/drivers/net/wireless/intel/iwlwifi/mld/scan.h
@@ -27,6 +27,9 @@ int iwl_mld_sched_scan_start(struct iwl_mld *mld,
void iwl_mld_handle_match_found_notif(struct iwl_mld *mld,
struct iwl_rx_packet *pkt);
+void iwl_mld_handle_scan_start_notif(struct iwl_mld *mld,
+ struct iwl_rx_packet *pkt);
+
void iwl_mld_handle_scan_complete_notif(struct iwl_mld *mld,
struct iwl_rx_packet *pkt);
@@ -114,8 +117,8 @@ enum iwl_mld_traffic_load {
* in jiffies.
* @last_start_time_jiffies: stores the last start time in jiffies
* (interface up/reset/resume).
- * @last_mlo_scan_time: start time of the last MLO scan in nanoseconds since
- * boot.
+ * @last_mlo_scan_start_time: start time of the last MLO scan in nanoseconds
+ * since boot.
*/
struct iwl_mld_scan {
/* Add here fields that need clean up on restart */
@@ -136,7 +139,7 @@ struct iwl_mld_scan {
void *cmd;
unsigned long last_6ghz_passive_jiffies;
unsigned long last_start_time_jiffies;
- u64 last_mlo_scan_time;
+ u64 last_mlo_scan_start_time;
};
/**
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
index a19f9d2e9346..9a74f60c9185 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
@@ -2807,7 +2807,7 @@ static void iwl_mvm_nd_match_info_handler(struct iwl_mvm *mvm,
if (IS_ERR_OR_NULL(vif))
return;
- if (len < sizeof(struct iwl_scan_offload_match_info)) {
+ if (len < sizeof(struct iwl_scan_offload_match_info) + matches_len) {
IWL_ERR(mvm, "Invalid scan match info notification\n");
return;
}
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index 43cf94c9a36b..6cc78661116e 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -470,7 +470,8 @@ static void iwl_mvm_uats_init(struct iwl_mvm *mvm)
.dataflags[0] = IWL_HCMD_DFL_NOCOPY,
};
- if (mvm->trans->mac_cfg->device_family < IWL_DEVICE_FAMILY_AX210) {
+ if (mvm->trans->mac_cfg->device_family < IWL_DEVICE_FAMILY_AX210 ||
+ !mvm->trans->cfg->uhb_supported) {
IWL_DEBUG_RADIO(mvm, "UATS feature is not supported\n");
return;
}
diff --git a/drivers/net/wireless/microchip/wilc1000/hif.c b/drivers/net/wireless/microchip/wilc1000/hif.c
index f354b11cb919..944b2a812b63 100644
--- a/drivers/net/wireless/microchip/wilc1000/hif.c
+++ b/drivers/net/wireless/microchip/wilc1000/hif.c
@@ -163,7 +163,7 @@ int wilc_scan(struct wilc_vif *vif, u8 scan_source,
u32 index = 0;
u32 i, scan_timeout;
u8 *buffer;
- u8 valuesize = 0;
+ u32 valuesize = 0;
u8 *search_ssid_vals = NULL;
const u8 ch_list_len = request->n_channels;
struct host_if_drv *hif_drv = vif->hif_drv;
diff --git a/drivers/net/wireless/ti/wl1251/tx.c b/drivers/net/wireless/ti/wl1251/tx.c
index 2da8c0d5105b..4489aa77bb0f 100644
--- a/drivers/net/wireless/ti/wl1251/tx.c
+++ b/drivers/net/wireless/ti/wl1251/tx.c
@@ -402,12 +402,14 @@ static void wl1251_tx_packet_cb(struct wl1251 *wl,
int hdrlen;
u8 *frame;
- skb = wl->tx_frames[result->id];
- if (skb == NULL) {
- wl1251_error("SKB for packet %d is NULL", result->id);
+ if (unlikely(result->id >= ARRAY_SIZE(wl->tx_frames) ||
+ wl->tx_frames[result->id] == NULL)) {
+ wl1251_error("invalid packet id %u", result->id);
return;
}
+ skb = wl->tx_frames[result->id];
+
info = IEEE80211_SKB_CB(skb);
if (!(info->flags & IEEE80211_TX_CTL_NO_ACK) &&
diff --git a/drivers/net/wireless/virtual/virt_wifi.c b/drivers/net/wireless/virtual/virt_wifi.c
index 885dc7243e8d..97bd39d89e98 100644
--- a/drivers/net/wireless/virtual/virt_wifi.c
+++ b/drivers/net/wireless/virtual/virt_wifi.c
@@ -557,7 +557,6 @@ static int virt_wifi_newlink(struct net_device *dev,
eth_hw_addr_inherit(dev, priv->lowerdev);
netif_stacked_transfer_operstate(priv->lowerdev, dev);
- SET_NETDEV_DEV(dev, &priv->lowerdev->dev);
dev->ieee80211_ptr = kzalloc_obj(*dev->ieee80211_ptr);
if (!dev->ieee80211_ptr) {
diff --git a/drivers/nfc/pn533/uart.c b/drivers/nfc/pn533/uart.c
index 6d2f520a5bc8..1b82b7b2a5fa 100644
--- a/drivers/nfc/pn533/uart.c
+++ b/drivers/nfc/pn533/uart.c
@@ -211,6 +211,9 @@ static size_t pn532_receive_buf(struct serdev_device *serdev,
timer_delete(&dev->cmd_timeout);
for (i = 0; i < count; i++) {
+ if (unlikely(!skb_tailroom(dev->recv_skb)))
+ skb_trim(dev->recv_skb, 0);
+
skb_put_u8(dev->recv_skb, *data++);
if (!pn532_uart_rx_is_frame(dev->recv_skb))
continue;
diff --git a/drivers/pci/pwrctrl/core.c b/drivers/pci/pwrctrl/core.c
index 7754baed67f2..97cff5b8ca88 100644
--- a/drivers/pci/pwrctrl/core.c
+++ b/drivers/pci/pwrctrl/core.c
@@ -299,8 +299,10 @@ static bool pci_pwrctrl_is_required(struct device_node *np)
struct device_node *remote __free(device_node) =
of_graph_get_remote_port_parent(endpoint);
if (remote) {
- if (of_pci_supply_present(remote))
+ if (of_pci_supply_present(remote)) {
+ of_node_put(endpoint);
return true;
+ }
}
}
}
diff --git a/drivers/pci/pwrctrl/pci-pwrctrl-pwrseq.c b/drivers/pci/pwrctrl/pci-pwrctrl-pwrseq.c
index 0d0377283c37..c7e4beec160a 100644
--- a/drivers/pci/pwrctrl/pci-pwrctrl-pwrseq.c
+++ b/drivers/pci/pwrctrl/pci-pwrctrl-pwrseq.c
@@ -68,13 +68,6 @@ static int pwrseq_pwrctrl_power_off(struct pci_pwrctrl *pwrctrl)
return pwrseq_power_off(pwrseq->pwrseq);
}
-static void devm_pwrseq_pwrctrl_power_off(void *data)
-{
- struct pwrseq_pwrctrl *pwrseq = data;
-
- pwrseq_pwrctrl_power_off(&pwrseq->pwrctrl);
-}
-
static int pwrseq_pwrctrl_probe(struct platform_device *pdev)
{
const struct pwrseq_pwrctrl_pdata *pdata;
@@ -101,11 +94,6 @@ static int pwrseq_pwrctrl_probe(struct platform_device *pdev)
return dev_err_probe(dev, PTR_ERR(pwrseq->pwrseq),
"Failed to get the power sequencer\n");
- ret = devm_add_action_or_reset(dev, devm_pwrseq_pwrctrl_power_off,
- pwrseq);
- if (ret)
- return ret;
-
pwrseq->pwrctrl.power_on = pwrseq_pwrctrl_power_on;
pwrseq->pwrctrl.power_off = pwrseq_pwrctrl_power_off;
diff --git a/drivers/pci/pwrctrl/slot.c b/drivers/pci/pwrctrl/slot.c
index 082af81efe25..b87639253ae2 100644
--- a/drivers/pci/pwrctrl/slot.c
+++ b/drivers/pci/pwrctrl/slot.c
@@ -63,7 +63,6 @@ static void devm_slot_pwrctrl_release(void *data)
{
struct slot_pwrctrl *slot = data;
- slot_pwrctrl_power_off(&slot->pwrctrl);
regulator_bulk_free(slot->num_supplies, slot->supplies);
}
diff --git a/drivers/phy/Kconfig b/drivers/phy/Kconfig
index 02467dfd4fb0..1875d5b784f6 100644
--- a/drivers/phy/Kconfig
+++ b/drivers/phy/Kconfig
@@ -6,7 +6,7 @@
menu "PHY Subsystem"
config PHY_COMMON_PROPS
- bool
+ bool "PHY common properties" if KUNIT_ALL_TESTS
help
This parses properties common between generic PHYs and Ethernet PHYs.
@@ -16,8 +16,7 @@ config PHY_COMMON_PROPS
config PHY_COMMON_PROPS_TEST
tristate "KUnit tests for PHY common props" if !KUNIT_ALL_TESTS
- select PHY_COMMON_PROPS
- depends on KUNIT
+ depends on KUNIT && PHY_COMMON_PROPS
default KUNIT_ALL_TESTS
help
This builds KUnit tests for the PHY common property API.
diff --git a/drivers/phy/freescale/phy-fsl-lynx-28g.c b/drivers/phy/freescale/phy-fsl-lynx-28g.c
index 2b0fd95ba62f..63427fc34e26 100644
--- a/drivers/phy/freescale/phy-fsl-lynx-28g.c
+++ b/drivers/phy/freescale/phy-fsl-lynx-28g.c
@@ -1069,6 +1069,8 @@ static void lynx_28g_cdr_lock_check(struct work_struct *work)
for (i = 0; i < LYNX_28G_NUM_LANE; i++) {
lane = &priv->lane[i];
+ if (!lane->phy)
+ continue;
mutex_lock(&lane->phy->mutex);
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
index df138a5442eb..771bc7c2ab50 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
@@ -990,6 +990,7 @@ static const struct qmp_phy_init_tbl sm8650_ufsphy_pcs[] = {
QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_MULTI_LANE_CTRL1, 0x02),
QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_TX_MID_TERM_CTRL1, 0x43),
QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_PCS_CTRL1, 0xc1),
+ QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_PLL_CNTL, 0x33),
QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_TX_LARGE_AMP_DRV_LVL, 0x0f),
QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_RX_SIGDET_CTRL2, 0x68),
QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_TX_POST_EMP_LVL_S4, 0x0e),
@@ -999,13 +1000,11 @@ static const struct qmp_phy_init_tbl sm8650_ufsphy_pcs[] = {
};
static const struct qmp_phy_init_tbl sm8650_ufsphy_g4_pcs[] = {
- QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_PLL_CNTL, 0x13),
QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_TX_HSGEAR_CAPABILITY, 0x04),
QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_RX_HSGEAR_CAPABILITY, 0x04),
};
static const struct qmp_phy_init_tbl sm8650_ufsphy_g5_pcs[] = {
- QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_PLL_CNTL, 0x33),
QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_TX_HSGEAR_CAPABILITY, 0x05),
QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_RX_HSGEAR_CAPABILITY, 0x05),
QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_RX_HS_G5_SYNC_LENGTH_CAPABILITY, 0x4d),
diff --git a/drivers/phy/spacemit/phy-k1-usb2.c b/drivers/phy/spacemit/phy-k1-usb2.c
index 342061380012..9215d0b223b2 100644
--- a/drivers/phy/spacemit/phy-k1-usb2.c
+++ b/drivers/phy/spacemit/phy-k1-usb2.c
@@ -48,6 +48,9 @@
#define PHY_CLK_HSTXP_EN BIT(3) /* clock hstxp enable */
#define PHY_HSTXP_MODE BIT(4) /* 0: force en_txp to be 1; 1: no force */
+#define PHY_K1_HS_HOST_DISC 0x40
+#define PHY_K1_HS_HOST_DISC_CLR BIT(0)
+
#define PHY_PLL_DIV_CFG 0x98
#define PHY_FDIV_FRACT_8_15 GENMASK(7, 0)
#define PHY_FDIV_FRACT_16_19 GENMASK(11, 8)
@@ -142,9 +145,20 @@ static int spacemit_usb2phy_exit(struct phy *phy)
return 0;
}
+static int spacemit_usb2phy_disconnect(struct phy *phy, int port)
+{
+ struct spacemit_usb2phy *sphy = phy_get_drvdata(phy);
+
+ regmap_update_bits(sphy->regmap_base, PHY_K1_HS_HOST_DISC,
+ PHY_K1_HS_HOST_DISC_CLR, PHY_K1_HS_HOST_DISC_CLR);
+
+ return 0;
+}
+
static const struct phy_ops spacemit_usb2phy_ops = {
.init = spacemit_usb2phy_init,
.exit = spacemit_usb2phy_exit,
+ .disconnect = spacemit_usb2phy_disconnect,
.owner = THIS_MODULE,
};
diff --git a/drivers/phy/ti/phy-j721e-wiz.c b/drivers/phy/ti/phy-j721e-wiz.c
index 6e9ecb88dc8b..6b584706b913 100644
--- a/drivers/phy/ti/phy-j721e-wiz.c
+++ b/drivers/phy/ti/phy-j721e-wiz.c
@@ -1425,6 +1425,7 @@ static int wiz_get_lane_phy_types(struct device *dev, struct wiz *wiz)
dev_err(dev,
"%s: Reading \"reg\" from \"%s\" failed: %d\n",
__func__, subnode->name, ret);
+ of_node_put(serdes);
return ret;
}
of_property_read_u32(subnode, "cdns,num-lanes", &num_lanes);
@@ -1439,6 +1440,7 @@ static int wiz_get_lane_phy_types(struct device *dev, struct wiz *wiz)
}
}
+ of_node_put(serdes);
return 0;
}
diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
index d6a46fe0cda8..3f518dce6d23 100644
--- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
+++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
@@ -1135,9 +1135,12 @@ int mtk_pctrl_init(struct platform_device *pdev,
goto chip_error;
}
- ret = mtk_eint_init(pctl, pdev);
- if (ret)
- goto chip_error;
+ /* Only initialize EINT if we have EINT pins */
+ if (data->eint_hw.ap_num > 0) {
+ ret = mtk_eint_init(pctl, pdev);
+ if (ret)
+ goto chip_error;
+ }
return 0;
diff --git a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c
index 83f940fe30b2..d02d42513ebb 100644
--- a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c
+++ b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c
@@ -723,6 +723,21 @@ static const struct pinconf_ops pmic_gpio_pinconf_ops = {
.pin_config_group_dbg_show = pmic_gpio_config_dbg_show,
};
+static int pmic_gpio_get_direction(struct gpio_chip *chip, unsigned pin)
+{
+ struct pmic_gpio_state *state = gpiochip_get_data(chip);
+ struct pmic_gpio_pad *pad;
+
+ pad = state->ctrl->desc->pins[pin].drv_data;
+
+ if (!pad->is_enabled || pad->analog_pass ||
+ (!pad->input_enabled && !pad->output_enabled))
+ return -EINVAL;
+
+ /* Make sure the state is aligned on what pmic_gpio_get() returns */
+ return pad->input_enabled ? GPIO_LINE_DIRECTION_IN : GPIO_LINE_DIRECTION_OUT;
+}
+
static int pmic_gpio_direction_input(struct gpio_chip *chip, unsigned pin)
{
struct pmic_gpio_state *state = gpiochip_get_data(chip);
@@ -801,6 +816,7 @@ static void pmic_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip)
}
static const struct gpio_chip pmic_gpio_gpio_template = {
+ .get_direction = pmic_gpio_get_direction,
.direction_input = pmic_gpio_direction_input,
.direction_output = pmic_gpio_direction_output,
.get = pmic_gpio_get,
diff --git a/drivers/pinctrl/renesas/pinctrl-rza1.c b/drivers/pinctrl/renesas/pinctrl-rza1.c
index bc8b9b9ad05b..d2949e4dbaf7 100644
--- a/drivers/pinctrl/renesas/pinctrl-rza1.c
+++ b/drivers/pinctrl/renesas/pinctrl-rza1.c
@@ -589,7 +589,7 @@ static inline unsigned int rza1_get_bit(struct rza1_port *port,
{
void __iomem *mem = RZA1_ADDR(port->base, reg, port->id);
- return ioread16(mem) & BIT(bit);
+ return !!(ioread16(mem) & BIT(bit));
}
/**
diff --git a/drivers/pinctrl/renesas/pinctrl-rzt2h.c b/drivers/pinctrl/renesas/pinctrl-rzt2h.c
index 9949108a35bb..5927744c7a96 100644
--- a/drivers/pinctrl/renesas/pinctrl-rzt2h.c
+++ b/drivers/pinctrl/renesas/pinctrl-rzt2h.c
@@ -85,7 +85,7 @@ struct rzt2h_pinctrl {
struct gpio_chip gpio_chip;
struct pinctrl_gpio_range gpio_range;
DECLARE_BITMAP(used_irqs, RZT2H_INTERRUPTS_NUM);
- spinlock_t lock; /* lock read/write registers */
+ raw_spinlock_t lock; /* lock read/write registers */
struct mutex mutex; /* serialize adding groups and functions */
bool safety_port_enabled;
atomic_t wakeup_path;
@@ -145,7 +145,7 @@ static void rzt2h_pinctrl_set_pfc_mode(struct rzt2h_pinctrl *pctrl,
u64 reg64;
u16 reg16;
- guard(spinlock_irqsave)(&pctrl->lock);
+ guard(raw_spinlock_irqsave)(&pctrl->lock);
/* Set pin to 'Non-use (Hi-Z input protection)' */
reg16 = rzt2h_pinctrl_readw(pctrl, port, PM(port));
@@ -474,7 +474,7 @@ static int rzt2h_gpio_request(struct gpio_chip *chip, unsigned int offset)
if (ret)
return ret;
- guard(spinlock_irqsave)(&pctrl->lock);
+ guard(raw_spinlock_irqsave)(&pctrl->lock);
/* Select GPIO mode in PMC Register */
rzt2h_pinctrl_set_gpio_en(pctrl, port, bit, true);
@@ -487,7 +487,7 @@ static void rzt2h_gpio_set_direction(struct rzt2h_pinctrl *pctrl, u32 port,
{
u16 reg;
- guard(spinlock_irqsave)(&pctrl->lock);
+ guard(raw_spinlock_irqsave)(&pctrl->lock);
reg = rzt2h_pinctrl_readw(pctrl, port, PM(port));
reg &= ~PM_PIN_MASK(bit);
@@ -509,7 +509,7 @@ static int rzt2h_gpio_get_direction(struct gpio_chip *chip, unsigned int offset)
if (ret)
return ret;
- guard(spinlock_irqsave)(&pctrl->lock);
+ guard(raw_spinlock_irqsave)(&pctrl->lock);
if (rzt2h_pinctrl_readb(pctrl, port, PMC(port)) & BIT(bit)) {
/*
@@ -547,7 +547,7 @@ static int rzt2h_gpio_set(struct gpio_chip *chip, unsigned int offset,
u8 bit = RZT2H_PIN_ID_TO_PIN(offset);
u8 reg;
- guard(spinlock_irqsave)(&pctrl->lock);
+ guard(raw_spinlock_irqsave)(&pctrl->lock);
reg = rzt2h_pinctrl_readb(pctrl, port, P(port));
if (value)
@@ -833,6 +833,7 @@ static int rzt2h_gpio_register(struct rzt2h_pinctrl *pctrl)
if (ret)
return dev_err_probe(dev, ret, "Unable to parse gpio-ranges\n");
+ of_node_put(of_args.np);
if (of_args.args[0] != 0 || of_args.args[1] != 0 ||
of_args.args[2] != pctrl->data->n_port_pins)
return dev_err_probe(dev, -EINVAL,
@@ -964,7 +965,7 @@ static int rzt2h_pinctrl_probe(struct platform_device *pdev)
if (ret)
return ret;
- spin_lock_init(&pctrl->lock);
+ raw_spin_lock_init(&pctrl->lock);
mutex_init(&pctrl->mutex);
platform_set_drvdata(pdev, pctrl);
diff --git a/drivers/pinctrl/stm32/Kconfig b/drivers/pinctrl/stm32/Kconfig
index 5f67e1ee66dd..d6a171523012 100644
--- a/drivers/pinctrl/stm32/Kconfig
+++ b/drivers/pinctrl/stm32/Kconfig
@@ -65,6 +65,7 @@ config PINCTRL_STM32_HDP
select PINMUX
select GENERIC_PINCONF
select GPIOLIB
+ select GPIO_GENERIC
help
The Hardware Debug Port allows the observation of internal signals.
It uses configurable multiplexer to route signals in a dedicated observation register.
diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c
index c990b6118172..d3042e0c9712 100644
--- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c
+++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c
@@ -157,6 +157,7 @@ sunxi_pinctrl_desc_find_function_by_name(struct sunxi_pinctrl *pctl,
const char *pin_name,
const char *func_name)
{
+ unsigned long variant = pctl->flags & SUNXI_PINCTRL_VARIANT_MASK;
int i;
for (i = 0; i < pctl->desc->npins; i++) {
@@ -168,7 +169,7 @@ sunxi_pinctrl_desc_find_function_by_name(struct sunxi_pinctrl *pctl,
while (func->name) {
if (!strcmp(func->name, func_name) &&
(!func->variant ||
- func->variant & pctl->variant))
+ func->variant & variant))
return func;
func++;
@@ -209,6 +210,8 @@ sunxi_pinctrl_desc_find_function_by_pin_and_mux(struct sunxi_pinctrl *pctl,
const u16 pin_num,
const u8 muxval)
{
+ unsigned long variant = pctl->flags & SUNXI_PINCTRL_VARIANT_MASK;
+
for (unsigned int i = 0; i < pctl->desc->npins; i++) {
const struct sunxi_desc_pin *pin = pctl->desc->pins + i;
struct sunxi_desc_function *func = pin->functions;
@@ -216,7 +219,7 @@ sunxi_pinctrl_desc_find_function_by_pin_and_mux(struct sunxi_pinctrl *pctl,
if (pin->pin.number != pin_num)
continue;
- if (pin->variant && !(pctl->variant & pin->variant))
+ if (pin->variant && !(variant & pin->variant))
continue;
while (func->name) {
@@ -1089,6 +1092,9 @@ static int sunxi_pinctrl_irq_request_resources(struct irq_data *d)
{
struct sunxi_pinctrl *pctl = irq_data_get_irq_chip_data(d);
struct sunxi_desc_function *func;
+ unsigned int offset;
+ u32 reg, shift, mask;
+ u8 disabled_mux, muxval;
int ret;
func = sunxi_pinctrl_desc_find_function_by_pin(pctl,
@@ -1096,8 +1102,21 @@ static int sunxi_pinctrl_irq_request_resources(struct irq_data *d)
if (!func)
return -EINVAL;
- ret = gpiochip_lock_as_irq(pctl->chip,
- pctl->irq_array[d->hwirq] - pctl->desc->pin_base);
+ offset = pctl->irq_array[d->hwirq] - pctl->desc->pin_base;
+ sunxi_mux_reg(pctl, offset, &reg, &shift, &mask);
+ muxval = (readl(pctl->membase + reg) & mask) >> shift;
+
+ /* Change muxing to GPIO INPUT mode if at reset value */
+ if (pctl->flags & SUNXI_PINCTRL_NEW_REG_LAYOUT)
+ disabled_mux = SUN4I_FUNC_DISABLED_NEW;
+ else
+ disabled_mux = SUN4I_FUNC_DISABLED_OLD;
+
+ if (muxval == disabled_mux)
+ sunxi_pmx_set(pctl->pctl_dev, pctl->irq_array[d->hwirq],
+ SUN4I_FUNC_INPUT);
+
+ ret = gpiochip_lock_as_irq(pctl->chip, offset);
if (ret) {
dev_err(pctl->dev, "unable to lock HW IRQ %lu for IRQ\n",
irqd_to_hwirq(d));
@@ -1338,6 +1357,7 @@ static int sunxi_pinctrl_add_function(struct sunxi_pinctrl *pctl,
static int sunxi_pinctrl_build_state(struct platform_device *pdev)
{
struct sunxi_pinctrl *pctl = platform_get_drvdata(pdev);
+ unsigned long variant = pctl->flags & SUNXI_PINCTRL_VARIANT_MASK;
void *ptr;
int i;
@@ -1362,7 +1382,7 @@ static int sunxi_pinctrl_build_state(struct platform_device *pdev)
const struct sunxi_desc_pin *pin = pctl->desc->pins + i;
struct sunxi_pinctrl_group *group = pctl->groups + pctl->ngroups;
- if (pin->variant && !(pctl->variant & pin->variant))
+ if (pin->variant && !(variant & pin->variant))
continue;
group->name = pin->pin.name;
@@ -1387,11 +1407,11 @@ static int sunxi_pinctrl_build_state(struct platform_device *pdev)
const struct sunxi_desc_pin *pin = pctl->desc->pins + i;
struct sunxi_desc_function *func;
- if (pin->variant && !(pctl->variant & pin->variant))
+ if (pin->variant && !(variant & pin->variant))
continue;
for (func = pin->functions; func->name; func++) {
- if (func->variant && !(pctl->variant & func->variant))
+ if (func->variant && !(variant & func->variant))
continue;
/* Create interrupt mapping while we're at it */
@@ -1419,14 +1439,14 @@ static int sunxi_pinctrl_build_state(struct platform_device *pdev)
const struct sunxi_desc_pin *pin = pctl->desc->pins + i;
struct sunxi_desc_function *func;
- if (pin->variant && !(pctl->variant & pin->variant))
+ if (pin->variant && !(variant & pin->variant))
continue;
for (func = pin->functions; func->name; func++) {
struct sunxi_pinctrl_function *func_item;
const char **func_grp;
- if (func->variant && !(pctl->variant & func->variant))
+ if (func->variant && !(variant & func->variant))
continue;
func_item = sunxi_pinctrl_find_function_by_name(pctl,
@@ -1568,7 +1588,7 @@ int sunxi_pinctrl_init_with_flags(struct platform_device *pdev,
pctl->dev = &pdev->dev;
pctl->desc = desc;
- pctl->variant = flags & SUNXI_PINCTRL_VARIANT_MASK;
+ pctl->flags = flags;
if (flags & SUNXI_PINCTRL_NEW_REG_LAYOUT) {
pctl->bank_mem_size = D1_BANK_MEM_SIZE;
pctl->pull_regs_offset = D1_PULL_REGS_OFFSET;
@@ -1604,8 +1624,9 @@ int sunxi_pinctrl_init_with_flags(struct platform_device *pdev,
for (i = 0, pin_idx = 0; i < pctl->desc->npins; i++) {
const struct sunxi_desc_pin *pin = pctl->desc->pins + i;
+ unsigned long variant = pctl->flags & SUNXI_PINCTRL_VARIANT_MASK;
- if (pin->variant && !(pctl->variant & pin->variant))
+ if (pin->variant && !(variant & pin->variant))
continue;
pins[pin_idx++] = pin->pin;
diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.h b/drivers/pinctrl/sunxi/pinctrl-sunxi.h
index ad26e4de16a8..0daf7600e2fb 100644
--- a/drivers/pinctrl/sunxi/pinctrl-sunxi.h
+++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.h
@@ -86,6 +86,8 @@
#define SUN4I_FUNC_INPUT 0
#define SUN4I_FUNC_IRQ 6
+#define SUN4I_FUNC_DISABLED_OLD 7
+#define SUN4I_FUNC_DISABLED_NEW 15
#define SUNXI_PINCTRL_VARIANT_MASK GENMASK(7, 0)
#define SUNXI_PINCTRL_NEW_REG_LAYOUT BIT(8)
@@ -174,7 +176,7 @@ struct sunxi_pinctrl {
unsigned *irq_array;
raw_spinlock_t lock;
struct pinctrl_dev *pctl_dev;
- unsigned long variant;
+ unsigned long flags;
u32 bank_mem_size;
u32 pull_regs_offset;
u32 dlevel_field_width;
diff --git a/drivers/platform/olpc/olpc-xo175-ec.c b/drivers/platform/olpc/olpc-xo175-ec.c
index fa7b3bda688a..bee271a4fda1 100644
--- a/drivers/platform/olpc/olpc-xo175-ec.c
+++ b/drivers/platform/olpc/olpc-xo175-ec.c
@@ -482,7 +482,7 @@ static int olpc_xo175_ec_cmd(u8 cmd, u8 *inbuf, size_t inlen, u8 *resp,
dev_dbg(dev, "CMD %x, %zd bytes expected\n", cmd, resp_len);
if (inlen > 5) {
- dev_err(dev, "command len %zd too big!\n", resp_len);
+ dev_err(dev, "command len %zd too big!\n", inlen);
return -EOVERFLOW;
}
diff --git a/drivers/platform/x86/amd/hsmp/hsmp.c b/drivers/platform/x86/amd/hsmp/hsmp.c
index 19f82c1d3090..631ffc0978d1 100644
--- a/drivers/platform/x86/amd/hsmp/hsmp.c
+++ b/drivers/platform/x86/amd/hsmp/hsmp.c
@@ -117,7 +117,7 @@ static int __hsmp_send_message(struct hsmp_socket *sock, struct hsmp_message *ms
}
if (unlikely(mbox_status == HSMP_STATUS_NOT_READY)) {
- dev_err(sock->dev, "Message ID 0x%X failure : SMU tmeout (status = 0x%X)\n",
+ dev_err(sock->dev, "Message ID 0x%X failure : SMU timeout (status = 0x%X)\n",
msg->msg_id, mbox_status);
return -ETIMEDOUT;
} else if (unlikely(mbox_status == HSMP_ERR_INVALID_MSG)) {
diff --git a/drivers/platform/x86/asus-armoury.h b/drivers/platform/x86/asus-armoury.h
index 208f6fe16168..569743746347 100644
--- a/drivers/platform/x86/asus-armoury.h
+++ b/drivers/platform/x86/asus-armoury.h
@@ -1082,6 +1082,20 @@ static const struct dmi_system_id power_limits[] = {
},
{
.matches = {
+ DMI_MATCH(DMI_BOARD_NAME, "GA503QM"),
+ },
+ .driver_data = &(struct power_data) {
+ .ac_data = &(struct power_limits) {
+ .ppt_pl1_spl_min = 15,
+ .ppt_pl1_spl_def = 35,
+ .ppt_pl1_spl_max = 80,
+ .ppt_pl2_sppt_min = 65,
+ .ppt_pl2_sppt_max = 80,
+ },
+ },
+ },
+ {
+ .matches = {
DMI_MATCH(DMI_BOARD_NAME, "GA503QR"),
},
.driver_data = &(struct power_data) {
@@ -1520,6 +1534,35 @@ static const struct dmi_system_id power_limits[] = {
},
{
.matches = {
+ DMI_MATCH(DMI_BOARD_NAME, "GZ302EA"),
+ },
+ .driver_data = &(struct power_data) {
+ .ac_data = &(struct power_limits) {
+ .ppt_pl1_spl_min = 28,
+ .ppt_pl1_spl_def = 60,
+ .ppt_pl1_spl_max = 80,
+ .ppt_pl2_sppt_min = 32,
+ .ppt_pl2_sppt_def = 75,
+ .ppt_pl2_sppt_max = 92,
+ .ppt_pl3_fppt_min = 45,
+ .ppt_pl3_fppt_def = 86,
+ .ppt_pl3_fppt_max = 93,
+ },
+ .dc_data = &(struct power_limits) {
+ .ppt_pl1_spl_min = 28,
+ .ppt_pl1_spl_def = 45,
+ .ppt_pl1_spl_max = 80,
+ .ppt_pl2_sppt_min = 32,
+ .ppt_pl2_sppt_def = 52,
+ .ppt_pl2_sppt_max = 92,
+ .ppt_pl3_fppt_min = 45,
+ .ppt_pl3_fppt_def = 71,
+ .ppt_pl3_fppt_max = 93,
+ },
+ },
+ },
+ {
+ .matches = {
DMI_MATCH(DMI_BOARD_NAME, "G513I"),
},
.driver_data = &(struct power_data) {
@@ -1598,6 +1641,40 @@ static const struct dmi_system_id power_limits[] = {
},
{
.matches = {
+ DMI_MATCH(DMI_BOARD_NAME, "G614FP"),
+ },
+ .driver_data = &(struct power_data) {
+ .ac_data = &(struct power_limits) {
+ .ppt_pl1_spl_min = 30,
+ .ppt_pl1_spl_max = 120,
+ .ppt_pl2_sppt_min = 65,
+ .ppt_pl2_sppt_def = 140,
+ .ppt_pl2_sppt_max = 165,
+ .ppt_pl3_fppt_min = 65,
+ .ppt_pl3_fppt_def = 140,
+ .ppt_pl3_fppt_max = 165,
+ .nv_temp_target_min = 75,
+ .nv_temp_target_max = 87,
+ .nv_dynamic_boost_min = 5,
+ .nv_dynamic_boost_max = 15,
+ .nv_tgp_min = 50,
+ .nv_tgp_max = 100,
+ },
+ .dc_data = &(struct power_limits) {
+ .ppt_pl1_spl_min = 25,
+ .ppt_pl1_spl_max = 65,
+ .ppt_pl2_sppt_min = 25,
+ .ppt_pl2_sppt_max = 65,
+ .ppt_pl3_fppt_min = 35,
+ .ppt_pl3_fppt_max = 75,
+ .nv_temp_target_min = 75,
+ .nv_temp_target_max = 87,
+ },
+ .requires_fan_curve = true,
+ },
+ },
+ {
+ .matches = {
DMI_MATCH(DMI_BOARD_NAME, "G614J"),
},
.driver_data = &(struct power_data) {
diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c
index a38a65f5c550..b4677c5bba5b 100644
--- a/drivers/platform/x86/asus-nb-wmi.c
+++ b/drivers/platform/x86/asus-nb-wmi.c
@@ -548,7 +548,7 @@ static const struct dmi_system_id asus_quirks[] = {
.callback = dmi_matched,
.ident = "ASUS ROG Z13",
.matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+ DMI_MATCH(DMI_SYS_VENDOR, "ASUS"),
DMI_MATCH(DMI_PRODUCT_NAME, "ROG Flow Z13"),
},
.driver_data = &quirk_asus_z13,
diff --git a/drivers/platform/x86/hp/hp-wmi.c b/drivers/platform/x86/hp/hp-wmi.c
index 68ede7e5757a..988a0acc9622 100644
--- a/drivers/platform/x86/hp/hp-wmi.c
+++ b/drivers/platform/x86/hp/hp-wmi.c
@@ -120,6 +120,13 @@ static const struct thermal_profile_params omen_v1_thermal_params = {
.ec_tp_offset = HP_VICTUS_S_EC_THERMAL_PROFILE_OFFSET,
};
+static const struct thermal_profile_params omen_v1_legacy_thermal_params = {
+ .performance = HP_OMEN_V1_THERMAL_PROFILE_PERFORMANCE,
+ .balanced = HP_OMEN_V1_THERMAL_PROFILE_DEFAULT,
+ .low_power = HP_OMEN_V1_THERMAL_PROFILE_DEFAULT,
+ .ec_tp_offset = HP_OMEN_EC_THERMAL_PROFILE_OFFSET,
+};
+
/*
* A generic pointer for the currently-active board's thermal profile
* parameters.
@@ -176,6 +183,10 @@ static const char * const victus_thermal_profile_boards[] = {
/* DMI Board names of Victus 16-r and Victus 16-s laptops */
static const struct dmi_system_id victus_s_thermal_profile_boards[] __initconst = {
{
+ .matches = { DMI_MATCH(DMI_BOARD_NAME, "8A4D") },
+ .driver_data = (void *)&omen_v1_legacy_thermal_params,
+ },
+ {
.matches = { DMI_MATCH(DMI_BOARD_NAME, "8BAB") },
.driver_data = (void *)&omen_v1_thermal_params,
},
@@ -184,6 +195,10 @@ static const struct dmi_system_id victus_s_thermal_profile_boards[] __initconst
.driver_data = (void *)&victus_s_thermal_params,
},
{
+ .matches = { DMI_MATCH(DMI_BOARD_NAME, "8BCA") },
+ .driver_data = (void *)&omen_v1_thermal_params,
+ },
+ {
.matches = { DMI_MATCH(DMI_BOARD_NAME, "8BCD") },
.driver_data = (void *)&omen_v1_thermal_params,
},
@@ -196,6 +211,10 @@ static const struct dmi_system_id victus_s_thermal_profile_boards[] __initconst
.driver_data = (void *)&victus_s_thermal_params,
},
{
+ .matches = { DMI_MATCH(DMI_BOARD_NAME, "8C76") },
+ .driver_data = (void *)&omen_v1_thermal_params,
+ },
+ {
.matches = { DMI_MATCH(DMI_BOARD_NAME, "8C78") },
.driver_data = (void *)&omen_v1_thermal_params,
},
diff --git a/drivers/platform/x86/intel/hid.c b/drivers/platform/x86/intel/hid.c
index 95c405c8bac0..2ddd8af8c1ce 100644
--- a/drivers/platform/x86/intel/hid.c
+++ b/drivers/platform/x86/intel/hid.c
@@ -438,6 +438,14 @@ static int intel_hid_pl_suspend_handler(struct device *device)
return 0;
}
+static int intel_hid_pl_freeze_handler(struct device *device)
+{
+ struct intel_hid_priv *priv = dev_get_drvdata(device);
+
+ priv->wakeup_mode = false;
+ return intel_hid_pl_suspend_handler(device);
+}
+
static int intel_hid_pl_resume_handler(struct device *device)
{
intel_hid_pm_complete(device);
@@ -452,7 +460,7 @@ static int intel_hid_pl_resume_handler(struct device *device)
static const struct dev_pm_ops intel_hid_pl_pm_ops = {
.prepare = intel_hid_pm_prepare,
.complete = intel_hid_pm_complete,
- .freeze = intel_hid_pl_suspend_handler,
+ .freeze = intel_hid_pl_freeze_handler,
.thaw = intel_hid_pl_resume_handler,
.restore = intel_hid_pl_resume_handler,
.suspend = intel_hid_pl_suspend_handler,
diff --git a/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c b/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c
index b8cdaa233ea9..e238c3105c78 100644
--- a/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c
+++ b/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c
@@ -558,6 +558,9 @@ static bool disable_dynamic_sst_features(void)
{
u64 value;
+ if (!static_cpu_has(X86_FEATURE_HWP))
+ return true;
+
rdmsrq(MSR_PM_ENABLE, value);
return !(value & 0x1);
}
@@ -869,7 +872,7 @@ static int isst_if_get_perf_level(void __user *argp)
_read_pp_info("current_level", perf_level.current_level, SST_PP_STATUS_OFFSET,
SST_PP_LEVEL_START, SST_PP_LEVEL_WIDTH, SST_MUL_FACTOR_NONE)
_read_pp_info("locked", perf_level.locked, SST_PP_STATUS_OFFSET,
- SST_PP_LOCK_START, SST_PP_LEVEL_WIDTH, SST_MUL_FACTOR_NONE)
+ SST_PP_LOCK_START, SST_PP_LOCK_WIDTH, SST_MUL_FACTOR_NONE)
_read_pp_info("feature_state", perf_level.feature_state, SST_PP_STATUS_OFFSET,
SST_PP_FEATURE_STATE_START, SST_PP_FEATURE_STATE_WIDTH, SST_MUL_FACTOR_NONE)
perf_level.enabled = !!(power_domain_info->sst_header.cap_mask & BIT(1));
diff --git a/drivers/platform/x86/lenovo/wmi-gamezone.c b/drivers/platform/x86/lenovo/wmi-gamezone.c
index 381836d29a96..c7fe7e3c9f17 100644
--- a/drivers/platform/x86/lenovo/wmi-gamezone.c
+++ b/drivers/platform/x86/lenovo/wmi-gamezone.c
@@ -31,8 +31,6 @@
#define LWMI_GZ_METHOD_ID_SMARTFAN_SET 44
#define LWMI_GZ_METHOD_ID_SMARTFAN_GET 45
-static BLOCKING_NOTIFIER_HEAD(gz_chain_head);
-
struct lwmi_gz_priv {
enum thermal_mode current_mode;
struct notifier_block event_nb;
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index a20fce04fe79..3dd2adda195e 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -4966,7 +4966,8 @@ static void ibmvfc_discover_targets_done(struct ibmvfc_event *evt)
switch (mad_status) {
case IBMVFC_MAD_SUCCESS:
ibmvfc_dbg(vhost, "Discover Targets succeeded\n");
- vhost->num_targets = be32_to_cpu(rsp->num_written);
+ vhost->num_targets = min_t(u32, be32_to_cpu(rsp->num_written),
+ max_targets);
ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_ALLOC_TGTS);
break;
case IBMVFC_MAD_FAILED:
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index 12124f9d5ccd..13412702188e 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -1734,7 +1734,7 @@ static int sas_user_scan(struct Scsi_Host *shost, uint channel,
break;
default:
- if (channel < shost->max_channel) {
+ if (channel <= shost->max_channel) {
res = scsi_scan_host_selected(shost, channel, id, lun,
SCSI_SCAN_MANUAL);
} else {
diff --git a/drivers/scsi/ses.c b/drivers/scsi/ses.c
index 8e1686358e25..4c348645b04e 100644
--- a/drivers/scsi/ses.c
+++ b/drivers/scsi/ses.c
@@ -215,7 +215,7 @@ static unsigned char *ses_get_page2_descriptor(struct enclosure_device *edev,
unsigned char *type_ptr = ses_dev->page1_types;
unsigned char *desc_ptr = ses_dev->page2 + 8;
- if (ses_recv_diag(sdev, 2, ses_dev->page2, ses_dev->page2_len) < 0)
+ if (ses_recv_diag(sdev, 2, ses_dev->page2, ses_dev->page2_len))
return NULL;
for (i = 0; i < ses_dev->page1_num_types; i++, type_ptr += 4) {
diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c
index b361c1bb3e43..45390e9b8cae 100644
--- a/drivers/spi/spi-fsl-lpspi.c
+++ b/drivers/spi/spi-fsl-lpspi.c
@@ -1009,7 +1009,7 @@ static int fsl_lpspi_probe(struct platform_device *pdev)
enable_irq(irq);
}
- ret = devm_spi_register_controller(&pdev->dev, controller);
+ ret = spi_register_controller(controller);
if (ret < 0) {
dev_err_probe(&pdev->dev, ret, "spi_register_controller error\n");
goto free_dma;
@@ -1035,6 +1035,7 @@ static void fsl_lpspi_remove(struct platform_device *pdev)
struct fsl_lpspi_data *fsl_lpspi =
spi_controller_get_devdata(controller);
+ spi_unregister_controller(controller);
fsl_lpspi_dma_exit(controller);
pm_runtime_dont_use_autosuspend(fsl_lpspi->dev);
diff --git a/drivers/spi/spi-meson-spicc.c b/drivers/spi/spi-meson-spicc.c
index a7001b9e36e6..57768da3205d 100644
--- a/drivers/spi/spi-meson-spicc.c
+++ b/drivers/spi/spi-meson-spicc.c
@@ -1101,8 +1101,6 @@ static void meson_spicc_remove(struct platform_device *pdev)
/* Disable SPI */
writel(0, spicc->base + SPICC_CONREG);
-
- spi_controller_put(spicc->host);
}
static const struct meson_spicc_data meson_spicc_gx_data = {
diff --git a/drivers/spi/spi-sn-f-ospi.c b/drivers/spi/spi-sn-f-ospi.c
index bfcc140df810..3c61c799723b 100644
--- a/drivers/spi/spi-sn-f-ospi.c
+++ b/drivers/spi/spi-sn-f-ospi.c
@@ -612,7 +612,7 @@ static int f_ospi_probe(struct platform_device *pdev)
u32 num_cs = OSPI_NUM_CS;
int ret;
- ctlr = spi_alloc_host(dev, sizeof(*ospi));
+ ctlr = devm_spi_alloc_host(dev, sizeof(*ospi));
if (!ctlr)
return -ENOMEM;
@@ -635,43 +635,22 @@ static int f_ospi_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, ospi);
ospi->base = devm_platform_ioremap_resource(pdev, 0);
- if (IS_ERR(ospi->base)) {
- ret = PTR_ERR(ospi->base);
- goto err_put_ctlr;
- }
+ if (IS_ERR(ospi->base))
+ return PTR_ERR(ospi->base);
ospi->clk = devm_clk_get_enabled(dev, NULL);
- if (IS_ERR(ospi->clk)) {
- ret = PTR_ERR(ospi->clk);
- goto err_put_ctlr;
- }
+ if (IS_ERR(ospi->clk))
+ return PTR_ERR(ospi->clk);
- mutex_init(&ospi->mlock);
-
- ret = f_ospi_init(ospi);
+ ret = devm_mutex_init(dev, &ospi->mlock);
if (ret)
- goto err_destroy_mutex;
+ return ret;
- ret = devm_spi_register_controller(dev, ctlr);
+ ret = f_ospi_init(ospi);
if (ret)
- goto err_destroy_mutex;
-
- return 0;
-
-err_destroy_mutex:
- mutex_destroy(&ospi->mlock);
-
-err_put_ctlr:
- spi_controller_put(ctlr);
-
- return ret;
-}
-
-static void f_ospi_remove(struct platform_device *pdev)
-{
- struct f_ospi *ospi = platform_get_drvdata(pdev);
+ return ret;
- mutex_destroy(&ospi->mlock);
+ return devm_spi_register_controller(dev, ctlr);
}
static const struct of_device_id f_ospi_dt_ids[] = {
@@ -686,7 +665,6 @@ static struct platform_driver f_ospi_driver = {
.of_match_table = f_ospi_dt_ids,
},
.probe = f_ospi_probe,
- .remove = f_ospi_remove,
};
module_platform_driver(f_ospi_driver);
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 53dee314d76a..9b1125556d29 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -50,7 +50,6 @@ static void spidev_release(struct device *dev)
struct spi_device *spi = to_spi_device(dev);
spi_controller_put(spi->controller);
- kfree(spi->driver_override);
free_percpu(spi->pcpu_statistics);
kfree(spi);
}
@@ -73,10 +72,9 @@ static ssize_t driver_override_store(struct device *dev,
struct device_attribute *a,
const char *buf, size_t count)
{
- struct spi_device *spi = to_spi_device(dev);
int ret;
- ret = driver_set_override(dev, &spi->driver_override, buf, count);
+ ret = __device_set_driver_override(dev, buf, count);
if (ret)
return ret;
@@ -86,13 +84,8 @@ static ssize_t driver_override_store(struct device *dev,
static ssize_t driver_override_show(struct device *dev,
struct device_attribute *a, char *buf)
{
- const struct spi_device *spi = to_spi_device(dev);
- ssize_t len;
-
- device_lock(dev);
- len = sysfs_emit(buf, "%s\n", spi->driver_override ? : "");
- device_unlock(dev);
- return len;
+ guard(spinlock)(&dev->driver_override.lock);
+ return sysfs_emit(buf, "%s\n", dev->driver_override.name ?: "");
}
static DEVICE_ATTR_RW(driver_override);
@@ -376,10 +369,12 @@ static int spi_match_device(struct device *dev, const struct device_driver *drv)
{
const struct spi_device *spi = to_spi_device(dev);
const struct spi_driver *sdrv = to_spi_driver(drv);
+ int ret;
/* Check override first, and if set, only use the named driver */
- if (spi->driver_override)
- return strcmp(spi->driver_override, drv->name) == 0;
+ ret = device_match_driver_override(dev, drv);
+ if (ret >= 0)
+ return ret;
/* Attempt an OF style match */
if (of_driver_match_device(dev, drv))
@@ -3539,8 +3534,19 @@ int devm_spi_register_controller(struct device *dev,
if (ret)
return ret;
- return devm_add_action_or_reset(dev, devm_spi_unregister_controller, ctlr);
+ /*
+ * Prevent controller from being freed by spi_unregister_controller()
+ * if devm_add_action_or_reset() fails for a non-devres allocated
+ * controller.
+ */
+ spi_controller_get(ctlr);
+
+ ret = devm_add_action_or_reset(dev, devm_spi_unregister_controller, ctlr);
+ if (ret == 0 || ctlr->devm_allocated)
+ spi_controller_put(ctlr);
+
+ return ret;
}
EXPORT_SYMBOL_GPL(devm_spi_register_controller);
diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c
index d668bd19fd4a..528883d989b8 100644
--- a/drivers/target/loopback/tcm_loop.c
+++ b/drivers/target/loopback/tcm_loop.c
@@ -26,6 +26,7 @@
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/configfs.h>
+#include <linux/blk-mq.h>
#include <scsi/scsi.h>
#include <scsi/scsi_tcq.h>
#include <scsi/scsi_host.h>
@@ -269,15 +270,27 @@ static int tcm_loop_device_reset(struct scsi_cmnd *sc)
return (ret == TMR_FUNCTION_COMPLETE) ? SUCCESS : FAILED;
}
+static bool tcm_loop_flush_work_iter(struct request *rq, void *data)
+{
+ struct scsi_cmnd *sc = blk_mq_rq_to_pdu(rq);
+ struct tcm_loop_cmd *tl_cmd = scsi_cmd_priv(sc);
+ struct se_cmd *se_cmd = &tl_cmd->tl_se_cmd;
+
+ flush_work(&se_cmd->work);
+ return true;
+}
+
static int tcm_loop_target_reset(struct scsi_cmnd *sc)
{
struct tcm_loop_hba *tl_hba;
struct tcm_loop_tpg *tl_tpg;
+ struct Scsi_Host *sh = sc->device->host;
+ int ret;
/*
* Locate the tcm_loop_hba_t pointer
*/
- tl_hba = *(struct tcm_loop_hba **)shost_priv(sc->device->host);
+ tl_hba = *(struct tcm_loop_hba **)shost_priv(sh);
if (!tl_hba) {
pr_err("Unable to perform device reset without active I_T Nexus\n");
return FAILED;
@@ -286,11 +299,38 @@ static int tcm_loop_target_reset(struct scsi_cmnd *sc)
* Locate the tl_tpg pointer from TargetID in sc->device->id
*/
tl_tpg = &tl_hba->tl_hba_tpgs[sc->device->id];
- if (tl_tpg) {
- tl_tpg->tl_transport_status = TCM_TRANSPORT_ONLINE;
- return SUCCESS;
- }
- return FAILED;
+ if (!tl_tpg)
+ return FAILED;
+
+ /*
+ * Issue a LUN_RESET to drain all commands that the target core
+ * knows about. This handles commands not yet marked CMD_T_COMPLETE.
+ */
+ ret = tcm_loop_issue_tmr(tl_tpg, sc->device->lun, 0, TMR_LUN_RESET);
+ if (ret != TMR_FUNCTION_COMPLETE)
+ return FAILED;
+
+ /*
+ * Flush any deferred target core completion work that may still be
+ * queued. Commands that already had CMD_T_COMPLETE set before the TMR
+ * are skipped by the TMR drain, but their async completion work
+ * (transport_lun_remove_cmd → percpu_ref_put, release_cmd → scsi_done)
+ * may still be pending in target_completion_wq.
+ *
+ * The SCSI EH will reuse in-flight scsi_cmnd structures for recovery
+ * commands (e.g. TUR) immediately after this handler returns SUCCESS —
+ * if deferred work is still pending, the memset in queuecommand would
+ * zero the se_cmd while the work accesses it, leaking the LUN
+ * percpu_ref and hanging configfs unlink forever.
+ *
+ * Use blk_mq_tagset_busy_iter() to find all started requests and
+ * flush_work() on each — the same pattern used by mpi3mr, scsi_debug,
+ * and other SCSI drivers to drain outstanding commands during reset.
+ */
+ blk_mq_tagset_busy_iter(&sh->tag_set, tcm_loop_flush_work_iter, NULL);
+
+ tl_tpg->tl_transport_status = TCM_TRANSPORT_ONLINE;
+ return SUCCESS;
}
static const struct scsi_host_template tcm_loop_driver_template = {
diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
index 3ae1f7137d9d..3d593af30aa5 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -276,7 +276,7 @@ fd_execute_rw_aio(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
ssize_t len = 0;
int ret = 0, i;
- aio_cmd = kmalloc_flex(*aio_cmd, bvecs, sgl_nents);
+ aio_cmd = kzalloc_flex(*aio_cmd, bvecs, sgl_nents);
if (!aio_cmd)
return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_soc_slider.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_soc_slider.c
index 49ff3bae7271..91f291627132 100644
--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_soc_slider.c
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_soc_slider.c
@@ -176,15 +176,21 @@ static inline void write_soc_slider(struct proc_thermal_device *proc_priv, u64 v
static void set_soc_power_profile(struct proc_thermal_device *proc_priv, int slider)
{
+ u8 offset;
u64 val;
val = read_soc_slider(proc_priv);
val &= ~SLIDER_MASK;
val |= FIELD_PREP(SLIDER_MASK, slider) | BIT(SLIDER_ENABLE_BIT);
+ if (slider == SOC_SLIDER_VALUE_MINIMUM || slider == SOC_SLIDER_VALUE_MAXIMUM)
+ offset = 0;
+ else
+ offset = slider_offset;
+
/* Set the slider offset from module params */
val &= ~SLIDER_OFFSET_MASK;
- val |= FIELD_PREP(SLIDER_OFFSET_MASK, slider_offset);
+ val |= FIELD_PREP(SLIDER_OFFSET_MASK, offset);
write_soc_slider(proc_priv, val);
}
diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c b/drivers/vfio/pci/vfio_pci_dmabuf.c
index 478beafc6ac3..b1d658b8f7b5 100644
--- a/drivers/vfio/pci/vfio_pci_dmabuf.c
+++ b/drivers/vfio/pci/vfio_pci_dmabuf.c
@@ -301,11 +301,10 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
*/
ret = dma_buf_fd(priv->dmabuf, get_dma_buf.open_flags);
if (ret < 0)
- goto err_dma_buf;
+ dma_buf_put(priv->dmabuf);
+
return ret;
-err_dma_buf:
- dma_buf_put(priv->dmabuf);
err_dev_put:
vfio_device_put_registration(&vdev->vdev);
err_free_phys:
diff --git a/drivers/virt/coco/tdx-guest/tdx-guest.c b/drivers/virt/coco/tdx-guest/tdx-guest.c
index 4252b147593a..7cee97559ba2 100644
--- a/drivers/virt/coco/tdx-guest/tdx-guest.c
+++ b/drivers/virt/coco/tdx-guest/tdx-guest.c
@@ -171,6 +171,8 @@ static void tdx_mr_deinit(const struct attribute_group *mr_grp)
#define GET_QUOTE_SUCCESS 0
#define GET_QUOTE_IN_FLIGHT 0xffffffffffffffff
+#define TDX_QUOTE_MAX_LEN (GET_QUOTE_BUF_SIZE - sizeof(struct tdx_quote_buf))
+
/* struct tdx_quote_buf: Format of Quote request buffer.
* @version: Quote format version, filled by TD.
* @status: Status code of Quote request, filled by VMM.
@@ -269,6 +271,7 @@ static int tdx_report_new_locked(struct tsm_report *report, void *data)
u8 *buf;
struct tdx_quote_buf *quote_buf = quote_data;
struct tsm_report_desc *desc = &report->desc;
+ u32 out_len;
int ret;
u64 err;
@@ -306,12 +309,17 @@ static int tdx_report_new_locked(struct tsm_report *report, void *data)
return ret;
}
- buf = kvmemdup(quote_buf->data, quote_buf->out_len, GFP_KERNEL);
+ out_len = READ_ONCE(quote_buf->out_len);
+
+ if (out_len > TDX_QUOTE_MAX_LEN)
+ return -EFBIG;
+
+ buf = kvmemdup(quote_buf->data, out_len, GFP_KERNEL);
if (!buf)
return -ENOMEM;
report->outblob = buf;
- report->outblob_len = quote_buf->out_len;
+ report->outblob_len = out_len;
/*
* TODO: parse the PEM-formatted cert chain out of the quote buffer when
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 335692d41617..fbca7ce1c6bf 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -2912,10 +2912,10 @@ EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
* @data: the token identifying the buffer.
* @gfp: how to do memory allocations (if necessary).
*
- * Same as virtqueue_add_inbuf but passes DMA_ATTR_CPU_CACHE_CLEAN to indicate
- * that the CPU will not dirty any cacheline overlapping this buffer while it
- * is available, and to suppress overlapping cacheline warnings in DMA debug
- * builds.
+ * Same as virtqueue_add_inbuf but passes DMA_ATTR_DEBUGGING_IGNORE_CACHELINES
+ * to indicate that the CPU will not dirty any cacheline overlapping this buffer
+ * while it is available, and to suppress overlapping cacheline warnings in DMA
+ * debug builds.
*
* Caller must ensure we don't call this with other virtqueue operations
* at the same time (except where noted).
@@ -2928,7 +2928,7 @@ int virtqueue_add_inbuf_cache_clean(struct virtqueue *vq,
gfp_t gfp)
{
return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, false, gfp,
- DMA_ATTR_CPU_CACHE_CLEAN);
+ DMA_ATTR_DEBUGGING_IGNORE_CACHELINES);
}
EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_cache_clean);
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index 1759cc18753f..15ba592236e8 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -12,6 +12,7 @@
#include <linux/eventfd.h>
#include <linux/file.h>
#include <linux/kernel.h>
+#include <linux/kstrtox.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/poll.h>
@@ -30,7 +31,10 @@
#include <linux/seq_file.h>
#include <linux/miscdevice.h>
#include <linux/moduleparam.h>
+#include <linux/notifier.h>
+#include <linux/security.h>
#include <linux/virtio_mmio.h>
+#include <linux/wait.h>
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>
@@ -46,6 +50,7 @@
#include <xen/page.h>
#include <xen/xen-ops.h>
#include <xen/balloon.h>
+#include <xen/xenbus.h>
#ifdef CONFIG_XEN_ACPI
#include <xen/acpi.h>
#endif
@@ -68,10 +73,20 @@ module_param_named(dm_op_buf_max_size, privcmd_dm_op_buf_max_size, uint,
MODULE_PARM_DESC(dm_op_buf_max_size,
"Maximum size of a dm_op hypercall buffer");
+static bool unrestricted;
+module_param(unrestricted, bool, 0);
+MODULE_PARM_DESC(unrestricted,
+ "Don't restrict hypercalls to target domain if running in a domU");
+
struct privcmd_data {
domid_t domid;
};
+/* DOMID_INVALID implies no restriction */
+static domid_t target_domain = DOMID_INVALID;
+static bool restrict_wait;
+static DECLARE_WAIT_QUEUE_HEAD(restrict_wait_wq);
+
static int privcmd_vma_range_is_mapped(
struct vm_area_struct *vma,
unsigned long addr,
@@ -1563,13 +1578,16 @@ static long privcmd_ioctl(struct file *file,
static int privcmd_open(struct inode *ino, struct file *file)
{
- struct privcmd_data *data = kzalloc_obj(*data);
+ struct privcmd_data *data;
+ if (wait_event_interruptible(restrict_wait_wq, !restrict_wait) < 0)
+ return -EINTR;
+
+ data = kzalloc_obj(*data);
if (!data)
return -ENOMEM;
- /* DOMID_INVALID implies no restriction */
- data->domid = DOMID_INVALID;
+ data->domid = target_domain;
file->private_data = data;
return 0;
@@ -1662,6 +1680,52 @@ static struct miscdevice privcmd_dev = {
.fops = &xen_privcmd_fops,
};
+static int init_restrict(struct notifier_block *notifier,
+ unsigned long event,
+ void *data)
+{
+ char *target;
+ unsigned int domid;
+
+ /* Default to an guaranteed unused domain-id. */
+ target_domain = DOMID_IDLE;
+
+ target = xenbus_read(XBT_NIL, "target", "", NULL);
+ if (IS_ERR(target) || kstrtouint(target, 10, &domid)) {
+ pr_err("No target domain found, blocking all hypercalls\n");
+ goto out;
+ }
+
+ target_domain = domid;
+
+ out:
+ if (!IS_ERR(target))
+ kfree(target);
+
+ restrict_wait = false;
+ wake_up_all(&restrict_wait_wq);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block xenstore_notifier = {
+ .notifier_call = init_restrict,
+};
+
+static void __init restrict_driver(void)
+{
+ if (unrestricted) {
+ if (security_locked_down(LOCKDOWN_XEN_USER_ACTIONS))
+ pr_warn("Kernel is locked down, parameter \"unrestricted\" ignored\n");
+ else
+ return;
+ }
+
+ restrict_wait = true;
+
+ register_xenstore_notifier(&xenstore_notifier);
+}
+
static int __init privcmd_init(void)
{
int err;
@@ -1669,6 +1733,9 @@ static int __init privcmd_init(void)
if (!xen_domain())
return -ENODEV;
+ if (!xen_initial_domain())
+ restrict_driver();
+
err = misc_register(&privcmd_dev);
if (err != 0) {
pr_err("Could not register Xen privcmd device\n");
@@ -1698,6 +1765,9 @@ err_privcmdbuf:
static void __exit privcmd_exit(void)
{
+ if (!xen_initial_domain())
+ unregister_xenstore_notifier(&xenstore_notifier);
+
privcmd_ioeventfd_exit();
privcmd_irqfd_exit();
misc_deregister(&privcmd_dev);
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index ebf5079096af..c0d17a369bda 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -4583,7 +4583,7 @@ static void check_removing_space_info(struct btrfs_space_info *space_info)
for (int i = 0; i < BTRFS_SPACE_INFO_SUB_GROUP_MAX; i++) {
if (space_info->sub_group[i]) {
check_removing_space_info(space_info->sub_group[i]);
- kfree(space_info->sub_group[i]);
+ btrfs_sysfs_remove_space_info(space_info->sub_group[i]);
space_info->sub_group[i] = NULL;
}
}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 01f2dbb69832..1b0eb246b714 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2531,8 +2531,8 @@ int btrfs_validate_super(const struct btrfs_fs_info *fs_info,
if (mirror_num >= 0 &&
btrfs_super_bytenr(sb) != btrfs_sb_offset(mirror_num)) {
- btrfs_err(fs_info, "super offset mismatch %llu != %u",
- btrfs_super_bytenr(sb), BTRFS_SUPER_INFO_OFFSET);
+ btrfs_err(fs_info, "super offset mismatch %llu != %llu",
+ btrfs_super_bytenr(sb), btrfs_sb_offset(mirror_num));
ret = -EINVAL;
}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 85ee5c79759d..098e64106d02 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -495,7 +495,7 @@ again:
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
if (key.objectid != bytenr ||
key.type != BTRFS_EXTENT_DATA_REF_KEY)
- return ret;
+ return -ENOENT;
ref = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_extent_data_ref);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index ab0d460c7139..ac871efb9763 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4616,21 +4616,32 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
struct inode *inode, bool log_inode_only,
u64 logged_isize)
{
+ u64 gen = BTRFS_I(inode)->generation;
u64 flags;
if (log_inode_only) {
- /* set the generation to zero so the recover code
- * can tell the difference between an logging
- * just to say 'this inode exists' and a logging
- * to say 'update this inode with these values'
+ /*
+ * Set the generation to zero so the recover code can tell the
+ * difference between a logging just to say 'this inode exists'
+ * and a logging to say 'update this inode with these values'.
+ * But only if the inode was not already logged before.
+ * We access ->logged_trans directly since it was already set
+ * up in the call chain by btrfs_log_inode(), and data_race()
+ * to avoid false alerts from KCSAN and since it was set already
+ * and one can set it to 0 since that only happens on eviction
+ * and we are holding a ref on the inode.
*/
- btrfs_set_inode_generation(leaf, item, 0);
+ ASSERT(data_race(BTRFS_I(inode)->logged_trans) > 0);
+ if (data_race(BTRFS_I(inode)->logged_trans) < trans->transid)
+ gen = 0;
+
btrfs_set_inode_size(leaf, item, logged_isize);
} else {
- btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation);
btrfs_set_inode_size(leaf, item, inode->i_size);
}
+ btrfs_set_inode_generation(leaf, item, gen);
+
btrfs_set_inode_uid(leaf, item, i_uid_read(inode));
btrfs_set_inode_gid(leaf, item, i_gid_read(inode));
btrfs_set_inode_mode(leaf, item, inode->i_mode);
@@ -5448,42 +5459,63 @@ process:
return 0;
}
-static int logged_inode_size(struct btrfs_root *log, struct btrfs_inode *inode,
- struct btrfs_path *path, u64 *size_ret)
+static int get_inode_size_to_log(struct btrfs_trans_handle *trans,
+ struct btrfs_inode *inode,
+ struct btrfs_path *path, u64 *size_ret)
{
struct btrfs_key key;
+ struct btrfs_inode_item *item;
int ret;
key.objectid = btrfs_ino(inode);
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
- ret = btrfs_search_slot(NULL, log, &key, path, 0, 0);
- if (ret < 0) {
- return ret;
- } else if (ret > 0) {
- *size_ret = 0;
- } else {
- struct btrfs_inode_item *item;
+ /*
+ * Our caller called inode_logged(), so logged_trans is up to date.
+ * Use data_race() to silence any warning from KCSAN. Once logged_trans
+ * is set, it can only be reset to 0 after inode eviction.
+ */
+ if (data_race(inode->logged_trans) == trans->transid) {
+ ret = btrfs_search_slot(NULL, inode->root->log_root, &key, path, 0, 0);
+ } else if (inode->generation < trans->transid) {
+ path->search_commit_root = true;
+ path->skip_locking = true;
+ ret = btrfs_search_slot(NULL, inode->root, &key, path, 0, 0);
+ path->search_commit_root = false;
+ path->skip_locking = false;
- item = btrfs_item_ptr(path->nodes[0], path->slots[0],
- struct btrfs_inode_item);
- *size_ret = btrfs_inode_size(path->nodes[0], item);
- /*
- * If the in-memory inode's i_size is smaller then the inode
- * size stored in the btree, return the inode's i_size, so
- * that we get a correct inode size after replaying the log
- * when before a power failure we had a shrinking truncate
- * followed by addition of a new name (rename / new hard link).
- * Otherwise return the inode size from the btree, to avoid
- * data loss when replaying a log due to previously doing a
- * write that expands the inode's size and logging a new name
- * immediately after.
- */
- if (*size_ret > inode->vfs_inode.i_size)
- *size_ret = inode->vfs_inode.i_size;
+ } else {
+ *size_ret = 0;
+ return 0;
}
+ /*
+ * If the inode was logged before or is from a past transaction, then
+ * its inode item must exist in the log root or in the commit root.
+ */
+ ASSERT(ret <= 0);
+ if (WARN_ON_ONCE(ret > 0))
+ ret = -ENOENT;
+
+ if (ret < 0)
+ return ret;
+
+ item = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_inode_item);
+ *size_ret = btrfs_inode_size(path->nodes[0], item);
+ /*
+ * If the in-memory inode's i_size is smaller then the inode size stored
+ * in the btree, return the inode's i_size, so that we get a correct
+ * inode size after replaying the log when before a power failure we had
+ * a shrinking truncate followed by addition of a new name (rename / new
+ * hard link). Otherwise return the inode size from the btree, to avoid
+ * data loss when replaying a log due to previously doing a write that
+ * expands the inode's size and logging a new name immediately after.
+ */
+ if (*size_ret > inode->vfs_inode.i_size)
+ *size_ret = inode->vfs_inode.i_size;
+
btrfs_release_path(path);
return 0;
}
@@ -6996,7 +7028,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
ret = drop_inode_items(trans, log, path, inode,
BTRFS_XATTR_ITEM_KEY);
} else {
- if (inode_only == LOG_INODE_EXISTS && ctx->logged_before) {
+ if (inode_only == LOG_INODE_EXISTS) {
/*
* Make sure the new inode item we write to the log has
* the same isize as the current one (if it exists).
@@ -7010,7 +7042,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
* (zeroes), as if an expanding truncate happened,
* instead of getting a file of 4Kb only.
*/
- ret = logged_inode_size(log, inode, path, &logged_isize);
+ ret = get_inode_size_to_log(trans, inode, path, &logged_isize);
if (ret)
goto out_unlock;
}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index ab51e6ecfdef..6b8e810a35ce 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -8099,8 +8099,9 @@ int btrfs_run_dev_stats(struct btrfs_trans_handle *trans)
smp_rmb();
ret = update_dev_stat_item(trans, device);
- if (!ret)
- atomic_sub(stats_cnt, &device->dev_stats_ccnt);
+ if (ret)
+ break;
+ atomic_sub(stats_cnt, &device->dev_stats_ccnt);
}
mutex_unlock(&fs_devices->device_list_mutex);
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index 1b7544aa88a2..c676e715b4f8 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -308,7 +308,9 @@ int zlib_compress_bio(struct list_head *ws, struct compressed_bio *cb)
}
/* Queue the remaining part of the folio. */
if (workspace->strm.total_out > bio->bi_iter.bi_size) {
- u32 cur_len = offset_in_folio(out_folio, workspace->strm.total_out);
+ const u32 cur_len = workspace->strm.total_out - bio->bi_iter.bi_size;
+
+ ASSERT(cur_len <= folio_size(out_folio));
if (!bio_add_folio(bio, out_folio, cur_len, 0)) {
ret = -E2BIG;
diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig
index a9f645f57bb2..97c48ebe8458 100644
--- a/fs/erofs/Kconfig
+++ b/fs/erofs/Kconfig
@@ -16,22 +16,36 @@ config EROFS_FS
select ZLIB_INFLATE if EROFS_FS_ZIP_DEFLATE
select ZSTD_DECOMPRESS if EROFS_FS_ZIP_ZSTD
help
- EROFS (Enhanced Read-Only File System) is a lightweight read-only
- file system with modern designs (e.g. no buffer heads, inline
- xattrs/data, chunk-based deduplication, multiple devices, etc.) for
- scenarios which need high-performance read-only solutions, e.g.
- smartphones with Android OS, LiveCDs and high-density hosts with
- numerous containers;
-
- It also provides transparent compression and deduplication support to
- improve storage density and maintain relatively high compression
- ratios, and it implements in-place decompression to temporarily reuse
- page cache for compressed data using proper strategies, which is
- quite useful for ensuring guaranteed end-to-end runtime decompression
+ EROFS (Enhanced Read-Only File System) is a modern, lightweight,
+ secure read-only filesystem for various use cases, such as immutable
+ system images, container images, application sandboxes, and datasets.
+
+ EROFS uses a flexible, hierarchical on-disk design so that features
+ can be enabled on demand: the core on-disk format is block-aligned in
+ order to perform optimally on all kinds of devices, including block
+ and memory-backed devices; the format is easy to parse and has zero
+ metadata redundancy, unlike generic filesystems, making it ideal for
+ filesystem auditing and remote access; inline data, random-access
+ friendly directory data, inline/shared extended attributes and
+ chunk-based deduplication ensure space efficiency while maintaining
+ high performance.
+
+ Optionally, it supports multiple devices to reference external data,
+ enabling data sharing for container images.
+
+ It also has advanced encoded on-disk layouts, particularly for data
+ compression and fine-grained deduplication. It utilizes fixed-size
+ output compression to improve storage density while keeping relatively
+ high compression ratios. Furthermore, it implements in-place
+ decompression to reuse file pages to keep compressed data temporarily
+ with proper strategies, which ensures guaranteed end-to-end runtime
performance under extreme memory pressure without extra cost.
- See the documentation at <file:Documentation/filesystems/erofs.rst>
- and the web pages at <https://erofs.docs.kernel.org> for more details.
+ For more details, see the web pages at <https://erofs.docs.kernel.org>
+ and the documentation at <file:Documentation/filesystems/erofs.rst>.
+
+ To compile EROFS filesystem support as a module, choose M here. The
+ module will be called erofs.
If unsure, say N.
@@ -105,7 +119,8 @@ config EROFS_FS_ZIP
depends on EROFS_FS
default y
help
- Enable transparent compression support for EROFS file systems.
+ Enable EROFS compression layouts so that filesystems containing
+ compressed files can be parsed by the kernel.
If you don't want to enable compression feature, say N.
diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c
index abe873f01297..98cdaa1cd1a7 100644
--- a/fs/erofs/fileio.c
+++ b/fs/erofs/fileio.c
@@ -25,10 +25,8 @@ static void erofs_fileio_ki_complete(struct kiocb *iocb, long ret)
container_of(iocb, struct erofs_fileio_rq, iocb);
struct folio_iter fi;
- if (ret >= 0 && ret != rq->bio.bi_iter.bi_size) {
- bio_advance(&rq->bio, ret);
- zero_fill_bio(&rq->bio);
- }
+ if (ret >= 0 && ret != rq->bio.bi_iter.bi_size)
+ ret = -EIO;
if (!rq->bio.bi_end_io) {
bio_for_each_folio_all(fi, &rq->bio) {
DBG_BUGON(folio_test_uptodate(fi.folio));
diff --git a/fs/erofs/ishare.c b/fs/erofs/ishare.c
index 829d50d5c717..ec433bacc592 100644
--- a/fs/erofs/ishare.c
+++ b/fs/erofs/ishare.c
@@ -200,8 +200,19 @@ struct inode *erofs_real_inode(struct inode *inode, bool *need_iput)
int __init erofs_init_ishare(void)
{
- erofs_ishare_mnt = kern_mount(&erofs_anon_fs_type);
- return PTR_ERR_OR_ZERO(erofs_ishare_mnt);
+ struct vfsmount *mnt;
+ int ret;
+
+ mnt = kern_mount(&erofs_anon_fs_type);
+ if (IS_ERR(mnt))
+ return PTR_ERR(mnt);
+ /* generic_fadvise() doesn't work if s_bdi == &noop_backing_dev_info */
+ ret = super_setup_bdi(mnt->mnt_sb);
+ if (ret)
+ kern_unmount(mnt);
+ else
+ erofs_ishare_mnt = mnt;
+ return ret;
}
void erofs_exit_ishare(void)
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 3977e42b9516..fe8121df9ef2 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -1445,6 +1445,7 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
int bios)
{
struct erofs_sb_info *const sbi = EROFS_SB(io->sb);
+ int gfp_flag;
/* wake up the caller thread for sync decompression */
if (io->sync) {
@@ -1477,7 +1478,9 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
sbi->sync_decompress = EROFS_SYNC_DECOMPRESS_FORCE_ON;
return;
}
+ gfp_flag = memalloc_noio_save();
z_erofs_decompressqueue_work(&io->u.work);
+ memalloc_noio_restore(gfp_flag);
}
static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index 72206a292676..3baee4e7c1cf 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -14,7 +14,8 @@ ext4-y := balloc.o bitmap.o block_validity.o dir.o ext4_jbd2.o extents.o \
ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o
ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o
-ext4-inode-test-objs += inode-test.o
-obj-$(CONFIG_EXT4_KUNIT_TESTS) += ext4-inode-test.o
+ext4-test-objs += inode-test.o mballoc-test.o \
+ extents-test.o
+obj-$(CONFIG_EXT4_KUNIT_TESTS) += ext4-test.o
ext4-$(CONFIG_FS_VERITY) += verity.o
ext4-$(CONFIG_FS_ENCRYPTION) += crypto.o
diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c
index cf0a0970c095..f41f320f4437 100644
--- a/fs/ext4/crypto.c
+++ b/fs/ext4/crypto.c
@@ -163,10 +163,17 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
*/
if (handle) {
+ /*
+ * Since the inode is new it is ok to pass the
+ * XATTR_CREATE flag. This is necessary to match the
+ * remaining journal credits check in the set_handle
+ * function with the credits allocated for the new
+ * inode.
+ */
res = ext4_xattr_set_handle(handle, inode,
EXT4_XATTR_INDEX_ENCRYPTION,
EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
- ctx, len, 0);
+ ctx, len, XATTR_CREATE);
if (!res) {
ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
ext4_clear_inode_state(inode,
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 293f698b7042..7617e2d454ea 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1570,6 +1570,7 @@ struct ext4_sb_info {
struct proc_dir_entry *s_proc;
struct kobject s_kobj;
struct completion s_kobj_unregister;
+ struct mutex s_error_notify_mutex; /* protects sysfs_notify vs kobject_del */
struct super_block *s_sb;
struct buffer_head *s_mmp_bh;
@@ -3944,6 +3945,11 @@ static inline bool ext4_inode_can_atomic_write(struct inode *inode)
extern int ext4_block_write_begin(handle_t *handle, struct folio *folio,
loff_t pos, unsigned len,
get_block_t *get_block);
+
+#if IS_ENABLED(CONFIG_EXT4_KUNIT_TESTS)
+#define EXPORT_SYMBOL_FOR_EXT4_TEST(sym) \
+ EXPORT_SYMBOL_FOR_MODULES(sym, "ext4-test")
+#endif
#endif /* __KERNEL__ */
#endif /* _EXT4_H */
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index c484125d963f..ebaf7cc42430 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -264,5 +264,17 @@ static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix,
0xffff);
}
+extern int __ext4_ext_dirty(const char *where, unsigned int line,
+ handle_t *handle, struct inode *inode,
+ struct ext4_ext_path *path);
+extern int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex);
+#if IS_ENABLED(CONFIG_EXT4_KUNIT_TESTS)
+extern int ext4_ext_space_root_idx_test(struct inode *inode, int check);
+extern struct ext4_ext_path *ext4_split_convert_extents_test(
+ handle_t *handle, struct inode *inode,
+ struct ext4_map_blocks *map,
+ struct ext4_ext_path *path,
+ int flags, unsigned int *allocated);
+#endif
#endif /* _EXT4_EXTENTS */
diff --git a/fs/ext4/extents-test.c b/fs/ext4/extents-test.c
index 7c4690eb7dad..5496b2c8e2cd 100644
--- a/fs/ext4/extents-test.c
+++ b/fs/ext4/extents-test.c
@@ -142,8 +142,10 @@ static struct file_system_type ext_fs_type = {
static void extents_kunit_exit(struct kunit *test)
{
- struct ext4_sb_info *sbi = k_ctx.k_ei->vfs_inode.i_sb->s_fs_info;
+ struct super_block *sb = k_ctx.k_ei->vfs_inode.i_sb;
+ struct ext4_sb_info *sbi = sb->s_fs_info;
+ ext4_es_unregister_shrinker(sbi);
kfree(sbi);
kfree(k_ctx.k_ei);
kfree(k_ctx.k_data);
@@ -280,8 +282,8 @@ static int extents_kunit_init(struct kunit *test)
eh->eh_depth = 0;
eh->eh_entries = cpu_to_le16(1);
eh->eh_magic = EXT4_EXT_MAGIC;
- eh->eh_max =
- cpu_to_le16(ext4_ext_space_root_idx(&k_ctx.k_ei->vfs_inode, 0));
+ eh->eh_max = cpu_to_le16(ext4_ext_space_root_idx_test(
+ &k_ctx.k_ei->vfs_inode, 0));
eh->eh_generation = 0;
/*
@@ -384,8 +386,8 @@ static void test_split_convert(struct kunit *test)
switch (param->type) {
case TEST_SPLIT_CONVERT:
- path = ext4_split_convert_extents(NULL, inode, &map, path,
- param->split_flags, NULL);
+ path = ext4_split_convert_extents_test(NULL, inode, &map,
+ path, param->split_flags, NULL);
break;
case TEST_CREATE_BLOCKS:
ext4_map_create_blocks_helper(test, inode, &map, param->split_flags);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index ae3804f36535..8cce1479be6d 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -184,9 +184,9 @@ static int ext4_ext_get_access(handle_t *handle, struct inode *inode,
* - ENOMEM
* - EIO
*/
-static int __ext4_ext_dirty(const char *where, unsigned int line,
- handle_t *handle, struct inode *inode,
- struct ext4_ext_path *path)
+int __ext4_ext_dirty(const char *where, unsigned int line,
+ handle_t *handle, struct inode *inode,
+ struct ext4_ext_path *path)
{
int err;
@@ -1736,6 +1736,13 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
err = ext4_ext_get_access(handle, inode, path + k);
if (err)
return err;
+ if (unlikely(path[k].p_idx > EXT_LAST_INDEX(path[k].p_hdr))) {
+ EXT4_ERROR_INODE(inode,
+ "path[%d].p_idx %p > EXT_LAST_INDEX %p",
+ k, path[k].p_idx,
+ EXT_LAST_INDEX(path[k].p_hdr));
+ return -EFSCORRUPTED;
+ }
path[k].p_idx->ei_block = border;
err = ext4_ext_dirty(handle, inode, path + k);
if (err)
@@ -1748,6 +1755,14 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
err = ext4_ext_get_access(handle, inode, path + k);
if (err)
goto clean;
+ if (unlikely(path[k].p_idx > EXT_LAST_INDEX(path[k].p_hdr))) {
+ EXT4_ERROR_INODE(inode,
+ "path[%d].p_idx %p > EXT_LAST_INDEX %p",
+ k, path[k].p_idx,
+ EXT_LAST_INDEX(path[k].p_hdr));
+ err = -EFSCORRUPTED;
+ goto clean;
+ }
path[k].p_idx->ei_block = border;
err = ext4_ext_dirty(handle, inode, path + k);
if (err)
@@ -3144,7 +3159,7 @@ static void ext4_zeroout_es(struct inode *inode, struct ext4_extent *ex)
}
/* FIXME!! we need to try to merge to left or right after zero-out */
-static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
+int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
{
ext4_fsblk_t ee_pblock;
unsigned int ee_len;
@@ -3239,6 +3254,9 @@ static struct ext4_ext_path *ext4_split_extent_at(handle_t *handle,
insert_err = PTR_ERR(path);
err = 0;
+ if (insert_err != -ENOSPC && insert_err != -EDQUOT &&
+ insert_err != -ENOMEM)
+ goto out_path;
/*
* Get a new path to try to zeroout or fix the extent length.
@@ -3255,13 +3273,20 @@ static struct ext4_ext_path *ext4_split_extent_at(handle_t *handle,
goto out_path;
}
+ depth = ext_depth(inode);
+ ex = path[depth].p_ext;
+ if (!ex) {
+ EXT4_ERROR_INODE(inode,
+ "bad extent address lblock: %lu, depth: %d pblock %llu",
+ (unsigned long)ee_block, depth, path[depth].p_block);
+ err = -EFSCORRUPTED;
+ goto out;
+ }
+
err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
goto out;
- depth = ext_depth(inode);
- ex = path[depth].p_ext;
-
fix_extent_len:
ex->ee_len = orig_ex.ee_len;
err = ext4_ext_dirty(handle, inode, path + path->p_depth);
@@ -3363,7 +3388,7 @@ static int ext4_split_extent_zeroout(handle_t *handle, struct inode *inode,
ext4_ext_mark_initialized(ex);
- ext4_ext_dirty(handle, inode, path + depth);
+ err = ext4_ext_dirty(handle, inode, path + depth);
if (err)
return err;
@@ -4457,9 +4482,13 @@ got_allocated_blocks:
path = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
if (IS_ERR(path)) {
err = PTR_ERR(path);
- if (allocated_clusters) {
+ /*
+ * Gracefully handle out of space conditions. If the filesystem
+ * is inconsistent, we'll just leak allocated blocks to avoid
+ * causing even more damage.
+ */
+ if (allocated_clusters && (err == -EDQUOT || err == -ENOSPC)) {
int fb_flags = 0;
-
/*
* free data blocks we just allocated.
* not a good idea to call discard here directly,
@@ -6238,6 +6267,33 @@ out:
return 0;
}
-#ifdef CONFIG_EXT4_KUNIT_TESTS
-#include "extents-test.c"
+#if IS_ENABLED(CONFIG_EXT4_KUNIT_TESTS)
+int ext4_ext_space_root_idx_test(struct inode *inode, int check)
+{
+ return ext4_ext_space_root_idx(inode, check);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_ext_space_root_idx_test);
+
+struct ext4_ext_path *ext4_split_convert_extents_test(handle_t *handle,
+ struct inode *inode, struct ext4_map_blocks *map,
+ struct ext4_ext_path *path, int flags,
+ unsigned int *allocated)
+{
+ return ext4_split_convert_extents(handle, inode, map, path,
+ flags, allocated);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_split_convert_extents_test);
+
+EXPORT_SYMBOL_FOR_EXT4_TEST(__ext4_ext_dirty);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_ext_zeroout);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_es_register_shrinker);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_es_unregister_shrinker);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_map_create_blocks);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_es_init_tree);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_es_lookup_extent);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_es_insert_extent);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_ext_insert_extent);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_find_extent);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_issue_zeroout);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_map_query_blocks);
#endif
diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index f575751f1cae..2f0057e04934 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -975,13 +975,13 @@ static int ext4_fc_flush_data(journal_t *journal)
int ret = 0;
list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
- ret = jbd2_submit_inode_data(journal, ei->jinode);
+ ret = jbd2_submit_inode_data(journal, READ_ONCE(ei->jinode));
if (ret)
return ret;
}
list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
- ret = jbd2_wait_inode_data(journal, ei->jinode);
+ ret = jbd2_wait_inode_data(journal, READ_ONCE(ei->jinode));
if (ret)
return ret;
}
@@ -1613,19 +1613,21 @@ static int ext4_fc_replay_inode(struct super_block *sb,
/* Immediately update the inode on disk. */
ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh);
if (ret)
- goto out;
+ goto out_brelse;
ret = sync_dirty_buffer(iloc.bh);
if (ret)
- goto out;
+ goto out_brelse;
ret = ext4_mark_inode_used(sb, ino);
if (ret)
- goto out;
+ goto out_brelse;
/* Given that we just wrote the inode on disk, this SHOULD succeed. */
inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
if (IS_ERR(inode)) {
ext4_debug("Inode not found.");
- return -EFSCORRUPTED;
+ inode = NULL;
+ ret = -EFSCORRUPTED;
+ goto out_brelse;
}
/*
@@ -1642,13 +1644,14 @@ static int ext4_fc_replay_inode(struct super_block *sb,
ext4_inode_csum_set(inode, ext4_raw_inode(&iloc), EXT4_I(inode));
ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh);
sync_dirty_buffer(iloc.bh);
+out_brelse:
brelse(iloc.bh);
out:
iput(inode);
if (!ret)
blkdev_issue_flush(sb->s_bdev);
- return 0;
+ return ret;
}
/*
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index e476c6de3074..bd8f230fa507 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -83,11 +83,23 @@ static int ext4_fsync_nojournal(struct file *file, loff_t start, loff_t end,
int datasync, bool *needs_barrier)
{
struct inode *inode = file->f_inode;
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL,
+ .nr_to_write = 0,
+ };
int ret;
ret = generic_buffers_fsync_noflush(file, start, end, datasync);
- if (!ret)
- ret = ext4_sync_parent(inode);
+ if (ret)
+ return ret;
+
+ /* Force writeout of inode table buffer to disk */
+ ret = ext4_write_inode(inode, &wbc);
+ if (ret)
+ return ret;
+
+ ret = ext4_sync_parent(inode);
+
if (test_opt(inode->i_sb, BARRIER))
*needs_barrier = true;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index b20a1bf866ab..b1bc1950c9f0 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -686,6 +686,12 @@ static int recently_deleted(struct super_block *sb, ext4_group_t group, int ino)
if (unlikely(!gdp))
return 0;
+ /* Inode was never used in this filesystem? */
+ if (ext4_has_group_desc_csum(sb) &&
+ (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT) ||
+ ino >= EXT4_INODES_PER_GROUP(sb) - ext4_itable_unused_count(sb, gdp)))
+ return 0;
+
bh = sb_find_get_block(sb, ext4_inode_table(sb, gdp) +
(ino / inodes_per_block));
if (!bh || !buffer_uptodate(bh))
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 1f6bc05593df..408677fa8196 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -522,7 +522,15 @@ static int ext4_read_inline_folio(struct inode *inode, struct folio *folio)
goto out;
len = min_t(size_t, ext4_get_inline_size(inode), i_size_read(inode));
- BUG_ON(len > PAGE_SIZE);
+
+ if (len > PAGE_SIZE) {
+ ext4_error_inode(inode, __func__, __LINE__, 0,
+ "inline size %zu exceeds PAGE_SIZE", len);
+ ret = -EFSCORRUPTED;
+ brelse(iloc.bh);
+ goto out;
+ }
+
kaddr = kmap_local_folio(folio, 0);
ret = ext4_read_inline_data(inode, kaddr, len, &iloc);
kaddr = folio_zero_tail(folio, len, kaddr + len);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 396dc3a5d16b..1123d995494b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -128,6 +128,8 @@ void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw,
static inline int ext4_begin_ordered_truncate(struct inode *inode,
loff_t new_size)
{
+ struct jbd2_inode *jinode = READ_ONCE(EXT4_I(inode)->jinode);
+
trace_ext4_begin_ordered_truncate(inode, new_size);
/*
* If jinode is zero, then we never opened the file for
@@ -135,10 +137,10 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode,
* jbd2_journal_begin_ordered_truncate() since there's no
* outstanding writes we need to flush.
*/
- if (!EXT4_I(inode)->jinode)
+ if (!jinode)
return 0;
return jbd2_journal_begin_ordered_truncate(EXT4_JOURNAL(inode),
- EXT4_I(inode)->jinode,
+ jinode,
new_size);
}
@@ -184,6 +186,14 @@ void ext4_evict_inode(struct inode *inode)
if (EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)
ext4_evict_ea_inode(inode);
if (inode->i_nlink) {
+ /*
+ * If there's dirty page will lead to data loss, user
+ * could see stale data.
+ */
+ if (unlikely(!ext4_emergency_state(inode->i_sb) &&
+ mapping_tagged(&inode->i_data, PAGECACHE_TAG_DIRTY)))
+ ext4_warning_inode(inode, "data will be lost");
+
truncate_inode_pages_final(&inode->i_data);
goto no_delete;
@@ -4451,8 +4461,13 @@ int ext4_inode_attach_jinode(struct inode *inode)
spin_unlock(&inode->i_lock);
return -ENOMEM;
}
- ei->jinode = jinode;
- jbd2_journal_init_jbd_inode(ei->jinode, inode);
+ jbd2_journal_init_jbd_inode(jinode, inode);
+ /*
+ * Publish ->jinode only after it is fully initialized so that
+ * readers never observe a partially initialized jbd2_inode.
+ */
+ smp_wmb();
+ WRITE_ONCE(ei->jinode, jinode);
jinode = NULL;
}
spin_unlock(&inode->i_lock);
@@ -5401,18 +5416,36 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
inode->i_op = &ext4_encrypted_symlink_inode_operations;
} else if (ext4_inode_is_fast_symlink(inode)) {
inode->i_op = &ext4_fast_symlink_inode_operations;
- if (inode->i_size == 0 ||
- inode->i_size >= sizeof(ei->i_data) ||
- strnlen((char *)ei->i_data, inode->i_size + 1) !=
- inode->i_size) {
- ext4_error_inode(inode, function, line, 0,
- "invalid fast symlink length %llu",
- (unsigned long long)inode->i_size);
- ret = -EFSCORRUPTED;
- goto bad_inode;
+
+ /*
+ * Orphan cleanup can see inodes with i_size == 0
+ * and i_data uninitialized. Skip size checks in
+ * that case. This is safe because the first thing
+ * ext4_evict_inode() does for fast symlinks is
+ * clearing of i_data and i_size.
+ */
+ if ((EXT4_SB(sb)->s_mount_state & EXT4_ORPHAN_FS)) {
+ if (inode->i_nlink != 0) {
+ ext4_error_inode(inode, function, line, 0,
+ "invalid orphan symlink nlink %d",
+ inode->i_nlink);
+ ret = -EFSCORRUPTED;
+ goto bad_inode;
+ }
+ } else {
+ if (inode->i_size == 0 ||
+ inode->i_size >= sizeof(ei->i_data) ||
+ strnlen((char *)ei->i_data, inode->i_size + 1) !=
+ inode->i_size) {
+ ext4_error_inode(inode, function, line, 0,
+ "invalid fast symlink length %llu",
+ (unsigned long long)inode->i_size);
+ ret = -EFSCORRUPTED;
+ goto bad_inode;
+ }
+ inode_set_cached_link(inode, (char *)ei->i_data,
+ inode->i_size);
}
- inode_set_cached_link(inode, (char *)ei->i_data,
- inode->i_size);
} else {
inode->i_op = &ext4_symlink_inode_operations;
}
@@ -5849,6 +5882,18 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
if (attr->ia_size == inode->i_size)
inc_ivers = false;
+ /*
+ * If file has inline data but new size exceeds inline capacity,
+ * convert to extent-based storage first to prevent inconsistent
+ * state (inline flag set but size exceeds inline capacity).
+ */
+ if (ext4_has_inline_data(inode) &&
+ attr->ia_size > EXT4_I(inode)->i_inline_size) {
+ error = ext4_convert_inline_data(inode);
+ if (error)
+ goto err_out;
+ }
+
if (shrink) {
if (ext4_should_order_data(inode)) {
error = ext4_begin_ordered_truncate(inode,
diff --git a/fs/ext4/mballoc-test.c b/fs/ext4/mballoc-test.c
index 9fbdf6a09489..6f5bfbb0e8a4 100644
--- a/fs/ext4/mballoc-test.c
+++ b/fs/ext4/mballoc-test.c
@@ -8,6 +8,7 @@
#include <linux/random.h>
#include "ext4.h"
+#include "mballoc.h"
struct mbt_grp_ctx {
struct buffer_head bitmap_bh;
@@ -336,7 +337,7 @@ ext4_mb_mark_context_stub(handle_t *handle, struct super_block *sb, bool state,
if (state)
mb_set_bits(bitmap_bh->b_data, blkoff, len);
else
- mb_clear_bits(bitmap_bh->b_data, blkoff, len);
+ mb_clear_bits_test(bitmap_bh->b_data, blkoff, len);
return 0;
}
@@ -413,14 +414,14 @@ static void test_new_blocks_simple(struct kunit *test)
/* get block at goal */
ar.goal = ext4_group_first_block_no(sb, goal_group);
- found = ext4_mb_new_blocks_simple(&ar, &err);
+ found = ext4_mb_new_blocks_simple_test(&ar, &err);
KUNIT_ASSERT_EQ_MSG(test, ar.goal, found,
"failed to alloc block at goal, expected %llu found %llu",
ar.goal, found);
/* get block after goal in goal group */
ar.goal = ext4_group_first_block_no(sb, goal_group);
- found = ext4_mb_new_blocks_simple(&ar, &err);
+ found = ext4_mb_new_blocks_simple_test(&ar, &err);
KUNIT_ASSERT_EQ_MSG(test, ar.goal + EXT4_C2B(sbi, 1), found,
"failed to alloc block after goal in goal group, expected %llu found %llu",
ar.goal + 1, found);
@@ -428,7 +429,7 @@ static void test_new_blocks_simple(struct kunit *test)
/* get block after goal group */
mbt_ctx_mark_used(sb, goal_group, 0, EXT4_CLUSTERS_PER_GROUP(sb));
ar.goal = ext4_group_first_block_no(sb, goal_group);
- found = ext4_mb_new_blocks_simple(&ar, &err);
+ found = ext4_mb_new_blocks_simple_test(&ar, &err);
KUNIT_ASSERT_EQ_MSG(test,
ext4_group_first_block_no(sb, goal_group + 1), found,
"failed to alloc block after goal group, expected %llu found %llu",
@@ -438,7 +439,7 @@ static void test_new_blocks_simple(struct kunit *test)
for (i = goal_group; i < ext4_get_groups_count(sb); i++)
mbt_ctx_mark_used(sb, i, 0, EXT4_CLUSTERS_PER_GROUP(sb));
ar.goal = ext4_group_first_block_no(sb, goal_group);
- found = ext4_mb_new_blocks_simple(&ar, &err);
+ found = ext4_mb_new_blocks_simple_test(&ar, &err);
KUNIT_ASSERT_EQ_MSG(test,
ext4_group_first_block_no(sb, 0) + EXT4_C2B(sbi, 1), found,
"failed to alloc block before goal group, expected %llu found %llu",
@@ -448,7 +449,7 @@ static void test_new_blocks_simple(struct kunit *test)
for (i = 0; i < ext4_get_groups_count(sb); i++)
mbt_ctx_mark_used(sb, i, 0, EXT4_CLUSTERS_PER_GROUP(sb));
ar.goal = ext4_group_first_block_no(sb, goal_group);
- found = ext4_mb_new_blocks_simple(&ar, &err);
+ found = ext4_mb_new_blocks_simple_test(&ar, &err);
KUNIT_ASSERT_NE_MSG(test, err, 0,
"unexpectedly get block when no block is available");
}
@@ -492,16 +493,16 @@ validate_free_blocks_simple(struct kunit *test, struct super_block *sb,
continue;
bitmap = mbt_ctx_bitmap(sb, i);
- bit = mb_find_next_zero_bit(bitmap, max, 0);
+ bit = mb_find_next_zero_bit_test(bitmap, max, 0);
KUNIT_ASSERT_EQ_MSG(test, bit, max,
"free block on unexpected group %d", i);
}
bitmap = mbt_ctx_bitmap(sb, goal_group);
- bit = mb_find_next_zero_bit(bitmap, max, 0);
+ bit = mb_find_next_zero_bit_test(bitmap, max, 0);
KUNIT_ASSERT_EQ(test, bit, start);
- bit = mb_find_next_bit(bitmap, max, bit + 1);
+ bit = mb_find_next_bit_test(bitmap, max, bit + 1);
KUNIT_ASSERT_EQ(test, bit, start + len);
}
@@ -524,7 +525,7 @@ test_free_blocks_simple_range(struct kunit *test, ext4_group_t goal_group,
block = ext4_group_first_block_no(sb, goal_group) +
EXT4_C2B(sbi, start);
- ext4_free_blocks_simple(inode, block, len);
+ ext4_free_blocks_simple_test(inode, block, len);
validate_free_blocks_simple(test, sb, goal_group, start, len);
mbt_ctx_mark_used(sb, goal_group, 0, EXT4_CLUSTERS_PER_GROUP(sb));
}
@@ -566,15 +567,15 @@ test_mark_diskspace_used_range(struct kunit *test,
bitmap = mbt_ctx_bitmap(sb, TEST_GOAL_GROUP);
memset(bitmap, 0, sb->s_blocksize);
- ret = ext4_mb_mark_diskspace_used(ac, NULL);
+ ret = ext4_mb_mark_diskspace_used_test(ac, NULL);
KUNIT_ASSERT_EQ(test, ret, 0);
max = EXT4_CLUSTERS_PER_GROUP(sb);
- i = mb_find_next_bit(bitmap, max, 0);
+ i = mb_find_next_bit_test(bitmap, max, 0);
KUNIT_ASSERT_EQ(test, i, start);
- i = mb_find_next_zero_bit(bitmap, max, i + 1);
+ i = mb_find_next_zero_bit_test(bitmap, max, i + 1);
KUNIT_ASSERT_EQ(test, i, start + len);
- i = mb_find_next_bit(bitmap, max, i + 1);
+ i = mb_find_next_bit_test(bitmap, max, i + 1);
KUNIT_ASSERT_EQ(test, max, i);
}
@@ -617,54 +618,54 @@ static void mbt_generate_buddy(struct super_block *sb, void *buddy,
max = EXT4_CLUSTERS_PER_GROUP(sb);
bb_h = buddy + sbi->s_mb_offsets[1];
- off = mb_find_next_zero_bit(bb, max, 0);
+ off = mb_find_next_zero_bit_test(bb, max, 0);
grp->bb_first_free = off;
while (off < max) {
grp->bb_counters[0]++;
grp->bb_free++;
- if (!(off & 1) && !mb_test_bit(off + 1, bb)) {
+ if (!(off & 1) && !mb_test_bit_test(off + 1, bb)) {
grp->bb_free++;
grp->bb_counters[0]--;
- mb_clear_bit(off >> 1, bb_h);
+ mb_clear_bit_test(off >> 1, bb_h);
grp->bb_counters[1]++;
grp->bb_largest_free_order = 1;
off++;
}
- off = mb_find_next_zero_bit(bb, max, off + 1);
+ off = mb_find_next_zero_bit_test(bb, max, off + 1);
}
for (order = 1; order < MB_NUM_ORDERS(sb) - 1; order++) {
bb = buddy + sbi->s_mb_offsets[order];
bb_h = buddy + sbi->s_mb_offsets[order + 1];
max = max >> 1;
- off = mb_find_next_zero_bit(bb, max, 0);
+ off = mb_find_next_zero_bit_test(bb, max, 0);
while (off < max) {
- if (!(off & 1) && !mb_test_bit(off + 1, bb)) {
+ if (!(off & 1) && !mb_test_bit_test(off + 1, bb)) {
mb_set_bits(bb, off, 2);
grp->bb_counters[order] -= 2;
- mb_clear_bit(off >> 1, bb_h);
+ mb_clear_bit_test(off >> 1, bb_h);
grp->bb_counters[order + 1]++;
grp->bb_largest_free_order = order + 1;
off++;
}
- off = mb_find_next_zero_bit(bb, max, off + 1);
+ off = mb_find_next_zero_bit_test(bb, max, off + 1);
}
}
max = EXT4_CLUSTERS_PER_GROUP(sb);
- off = mb_find_next_zero_bit(bitmap, max, 0);
+ off = mb_find_next_zero_bit_test(bitmap, max, 0);
while (off < max) {
grp->bb_fragments++;
- off = mb_find_next_bit(bitmap, max, off + 1);
+ off = mb_find_next_bit_test(bitmap, max, off + 1);
if (off + 1 >= max)
break;
- off = mb_find_next_zero_bit(bitmap, max, off + 1);
+ off = mb_find_next_zero_bit_test(bitmap, max, off + 1);
}
}
@@ -706,7 +707,7 @@ do_test_generate_buddy(struct kunit *test, struct super_block *sb, void *bitmap,
/* needed by validation in ext4_mb_generate_buddy */
ext4_grp->bb_free = mbt_grp->bb_free;
memset(ext4_buddy, 0xff, sb->s_blocksize);
- ext4_mb_generate_buddy(sb, ext4_buddy, bitmap, TEST_GOAL_GROUP,
+ ext4_mb_generate_buddy_test(sb, ext4_buddy, bitmap, TEST_GOAL_GROUP,
ext4_grp);
KUNIT_ASSERT_EQ(test, memcmp(mbt_buddy, ext4_buddy, sb->s_blocksize),
@@ -760,7 +761,7 @@ test_mb_mark_used_range(struct kunit *test, struct ext4_buddy *e4b,
ex.fe_group = TEST_GOAL_GROUP;
ext4_lock_group(sb, TEST_GOAL_GROUP);
- mb_mark_used(e4b, &ex);
+ mb_mark_used_test(e4b, &ex);
ext4_unlock_group(sb, TEST_GOAL_GROUP);
mb_set_bits(bitmap, start, len);
@@ -769,7 +770,7 @@ test_mb_mark_used_range(struct kunit *test, struct ext4_buddy *e4b,
memset(buddy, 0xff, sb->s_blocksize);
for (i = 0; i < MB_NUM_ORDERS(sb); i++)
grp->bb_counters[i] = 0;
- ext4_mb_generate_buddy(sb, buddy, bitmap, 0, grp);
+ ext4_mb_generate_buddy_test(sb, buddy, bitmap, 0, grp);
KUNIT_ASSERT_EQ(test, memcmp(buddy, e4b->bd_buddy, sb->s_blocksize),
0);
@@ -798,7 +799,7 @@ static void test_mb_mark_used(struct kunit *test)
bb_counters[MB_NUM_ORDERS(sb)]), GFP_KERNEL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, grp);
- ret = ext4_mb_load_buddy(sb, TEST_GOAL_GROUP, &e4b);
+ ret = ext4_mb_load_buddy_test(sb, TEST_GOAL_GROUP, &e4b);
KUNIT_ASSERT_EQ(test, ret, 0);
grp->bb_free = EXT4_CLUSTERS_PER_GROUP(sb);
@@ -809,7 +810,7 @@ static void test_mb_mark_used(struct kunit *test)
test_mb_mark_used_range(test, &e4b, ranges[i].start,
ranges[i].len, bitmap, buddy, grp);
- ext4_mb_unload_buddy(&e4b);
+ ext4_mb_unload_buddy_test(&e4b);
}
static void
@@ -825,16 +826,16 @@ test_mb_free_blocks_range(struct kunit *test, struct ext4_buddy *e4b,
return;
ext4_lock_group(sb, e4b->bd_group);
- mb_free_blocks(NULL, e4b, start, len);
+ mb_free_blocks_test(NULL, e4b, start, len);
ext4_unlock_group(sb, e4b->bd_group);
- mb_clear_bits(bitmap, start, len);
+ mb_clear_bits_test(bitmap, start, len);
/* bypass bb_free validatoin in ext4_mb_generate_buddy */
grp->bb_free += len;
memset(buddy, 0xff, sb->s_blocksize);
for (i = 0; i < MB_NUM_ORDERS(sb); i++)
grp->bb_counters[i] = 0;
- ext4_mb_generate_buddy(sb, buddy, bitmap, 0, grp);
+ ext4_mb_generate_buddy_test(sb, buddy, bitmap, 0, grp);
KUNIT_ASSERT_EQ(test, memcmp(buddy, e4b->bd_buddy, sb->s_blocksize),
0);
@@ -865,7 +866,7 @@ static void test_mb_free_blocks(struct kunit *test)
bb_counters[MB_NUM_ORDERS(sb)]), GFP_KERNEL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, grp);
- ret = ext4_mb_load_buddy(sb, TEST_GOAL_GROUP, &e4b);
+ ret = ext4_mb_load_buddy_test(sb, TEST_GOAL_GROUP, &e4b);
KUNIT_ASSERT_EQ(test, ret, 0);
ex.fe_start = 0;
@@ -873,7 +874,7 @@ static void test_mb_free_blocks(struct kunit *test)
ex.fe_group = TEST_GOAL_GROUP;
ext4_lock_group(sb, TEST_GOAL_GROUP);
- mb_mark_used(&e4b, &ex);
+ mb_mark_used_test(&e4b, &ex);
ext4_unlock_group(sb, TEST_GOAL_GROUP);
grp->bb_free = 0;
@@ -886,7 +887,7 @@ static void test_mb_free_blocks(struct kunit *test)
test_mb_free_blocks_range(test, &e4b, ranges[i].start,
ranges[i].len, bitmap, buddy, grp);
- ext4_mb_unload_buddy(&e4b);
+ ext4_mb_unload_buddy_test(&e4b);
}
#define COUNT_FOR_ESTIMATE 100000
@@ -904,7 +905,7 @@ static void test_mb_mark_used_cost(struct kunit *test)
if (sb->s_blocksize > PAGE_SIZE)
kunit_skip(test, "blocksize exceeds pagesize");
- ret = ext4_mb_load_buddy(sb, TEST_GOAL_GROUP, &e4b);
+ ret = ext4_mb_load_buddy_test(sb, TEST_GOAL_GROUP, &e4b);
KUNIT_ASSERT_EQ(test, ret, 0);
ex.fe_group = TEST_GOAL_GROUP;
@@ -918,7 +919,7 @@ static void test_mb_mark_used_cost(struct kunit *test)
ex.fe_start = ranges[i].start;
ex.fe_len = ranges[i].len;
ext4_lock_group(sb, TEST_GOAL_GROUP);
- mb_mark_used(&e4b, &ex);
+ mb_mark_used_test(&e4b, &ex);
ext4_unlock_group(sb, TEST_GOAL_GROUP);
}
end = jiffies;
@@ -929,14 +930,14 @@ static void test_mb_mark_used_cost(struct kunit *test)
continue;
ext4_lock_group(sb, TEST_GOAL_GROUP);
- mb_free_blocks(NULL, &e4b, ranges[i].start,
+ mb_free_blocks_test(NULL, &e4b, ranges[i].start,
ranges[i].len);
ext4_unlock_group(sb, TEST_GOAL_GROUP);
}
}
kunit_info(test, "costed jiffies %lu\n", all);
- ext4_mb_unload_buddy(&e4b);
+ ext4_mb_unload_buddy_test(&e4b);
}
static const struct mbt_ext4_block_layout mbt_test_layouts[] = {
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 20e9fdaf4301..bb58eafb87bc 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1199,6 +1199,8 @@ static int ext4_mb_scan_groups(struct ext4_allocation_context *ac)
/* searching for the right group start from the goal value specified */
start = ac->ac_g_ex.fe_group;
+ if (start >= ngroups)
+ start = 0;
ac->ac_prefetch_grp = start;
ac->ac_prefetch_nr = 0;
@@ -2443,8 +2445,12 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
return 0;
err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
- if (err)
+ if (err) {
+ if (EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info) &&
+ !(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
+ return 0;
return err;
+ }
ext4_lock_group(ac->ac_sb, group);
if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
@@ -3580,9 +3586,7 @@ err_freebuddy:
rcu_read_unlock();
iput(sbi->s_buddy_cache);
err_freesgi:
- rcu_read_lock();
- kvfree(rcu_dereference(sbi->s_group_info));
- rcu_read_unlock();
+ kvfree(rcu_access_pointer(sbi->s_group_info));
return -ENOMEM;
}
@@ -3889,15 +3893,14 @@ void ext4_mb_release(struct super_block *sb)
struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
int count;
- if (test_opt(sb, DISCARD)) {
- /*
- * wait the discard work to drain all of ext4_free_data
- */
- flush_work(&sbi->s_discard_work);
- WARN_ON_ONCE(!list_empty(&sbi->s_discard_list));
- }
+ /*
+ * wait the discard work to drain all of ext4_free_data
+ */
+ flush_work(&sbi->s_discard_work);
+ WARN_ON_ONCE(!list_empty(&sbi->s_discard_list));
- if (sbi->s_group_info) {
+ group_info = rcu_access_pointer(sbi->s_group_info);
+ if (group_info) {
for (i = 0; i < ngroups; i++) {
cond_resched();
grinfo = ext4_get_group_info(sb, i);
@@ -3915,12 +3918,9 @@ void ext4_mb_release(struct super_block *sb)
num_meta_group_infos = (ngroups +
EXT4_DESC_PER_BLOCK(sb) - 1) >>
EXT4_DESC_PER_BLOCK_BITS(sb);
- rcu_read_lock();
- group_info = rcu_dereference(sbi->s_group_info);
for (i = 0; i < num_meta_group_infos; i++)
kfree(group_info[i]);
kvfree(group_info);
- rcu_read_unlock();
}
ext4_mb_avg_fragment_size_destroy(sbi);
ext4_mb_largest_free_orders_destroy(sbi);
@@ -4084,7 +4084,7 @@ void ext4_exit_mballoc(void)
#define EXT4_MB_BITMAP_MARKED_CHECK 0x0001
#define EXT4_MB_SYNC_UPDATE 0x0002
-static int
+int
ext4_mb_mark_context(handle_t *handle, struct super_block *sb, bool state,
ext4_group_t group, ext4_grpblk_t blkoff,
ext4_grpblk_t len, int flags, ext4_grpblk_t *ret_changed)
@@ -7188,6 +7188,102 @@ out_unload:
return error;
}
-#ifdef CONFIG_EXT4_KUNIT_TESTS
-#include "mballoc-test.c"
+#if IS_ENABLED(CONFIG_EXT4_KUNIT_TESTS)
+void mb_clear_bits_test(void *bm, int cur, int len)
+{
+ mb_clear_bits(bm, cur, len);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(mb_clear_bits_test);
+
+ext4_fsblk_t
+ext4_mb_new_blocks_simple_test(struct ext4_allocation_request *ar,
+ int *errp)
+{
+ return ext4_mb_new_blocks_simple(ar, errp);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_new_blocks_simple_test);
+
+int mb_find_next_zero_bit_test(void *addr, int max, int start)
+{
+ return mb_find_next_zero_bit(addr, max, start);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(mb_find_next_zero_bit_test);
+
+int mb_find_next_bit_test(void *addr, int max, int start)
+{
+ return mb_find_next_bit(addr, max, start);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(mb_find_next_bit_test);
+
+void mb_clear_bit_test(int bit, void *addr)
+{
+ mb_clear_bit(bit, addr);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(mb_clear_bit_test);
+
+int mb_test_bit_test(int bit, void *addr)
+{
+ return mb_test_bit(bit, addr);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(mb_test_bit_test);
+
+int ext4_mb_mark_diskspace_used_test(struct ext4_allocation_context *ac,
+ handle_t *handle)
+{
+ return ext4_mb_mark_diskspace_used(ac, handle);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_mark_diskspace_used_test);
+
+int mb_mark_used_test(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
+{
+ return mb_mark_used(e4b, ex);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(mb_mark_used_test);
+
+void ext4_mb_generate_buddy_test(struct super_block *sb, void *buddy,
+ void *bitmap, ext4_group_t group,
+ struct ext4_group_info *grp)
+{
+ ext4_mb_generate_buddy(sb, buddy, bitmap, group, grp);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_generate_buddy_test);
+
+int ext4_mb_load_buddy_test(struct super_block *sb, ext4_group_t group,
+ struct ext4_buddy *e4b)
+{
+ return ext4_mb_load_buddy(sb, group, e4b);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_load_buddy_test);
+
+void ext4_mb_unload_buddy_test(struct ext4_buddy *e4b)
+{
+ ext4_mb_unload_buddy(e4b);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_unload_buddy_test);
+
+void mb_free_blocks_test(struct inode *inode, struct ext4_buddy *e4b,
+ int first, int count)
+{
+ mb_free_blocks(inode, e4b, first, count);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(mb_free_blocks_test);
+
+void ext4_free_blocks_simple_test(struct inode *inode, ext4_fsblk_t block,
+ unsigned long count)
+{
+ return ext4_free_blocks_simple(inode, block, count);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_free_blocks_simple_test);
+
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_wait_block_bitmap);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_init);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_get_group_desc);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_count_free_clusters);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_get_group_info);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_free_group_clusters_set);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_release);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_read_block_bitmap_nowait);
+EXPORT_SYMBOL_FOR_EXT4_TEST(mb_set_bits);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_fc_init_inode);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_mark_context);
#endif
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 15a049f05d04..39333ce72cbd 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -270,4 +270,34 @@ ext4_mballoc_query_range(
ext4_mballoc_query_range_fn formatter,
void *priv);
+extern int ext4_mb_mark_context(handle_t *handle,
+ struct super_block *sb, bool state,
+ ext4_group_t group, ext4_grpblk_t blkoff,
+ ext4_grpblk_t len, int flags,
+ ext4_grpblk_t *ret_changed);
+#if IS_ENABLED(CONFIG_EXT4_KUNIT_TESTS)
+extern void mb_clear_bits_test(void *bm, int cur, int len);
+extern ext4_fsblk_t
+ext4_mb_new_blocks_simple_test(struct ext4_allocation_request *ar,
+ int *errp);
+extern int mb_find_next_zero_bit_test(void *addr, int max, int start);
+extern int mb_find_next_bit_test(void *addr, int max, int start);
+extern void mb_clear_bit_test(int bit, void *addr);
+extern int mb_test_bit_test(int bit, void *addr);
+extern int
+ext4_mb_mark_diskspace_used_test(struct ext4_allocation_context *ac,
+ handle_t *handle);
+extern int mb_mark_used_test(struct ext4_buddy *e4b,
+ struct ext4_free_extent *ex);
+extern void ext4_mb_generate_buddy_test(struct super_block *sb,
+ void *buddy, void *bitmap, ext4_group_t group,
+ struct ext4_group_info *grp);
+extern int ext4_mb_load_buddy_test(struct super_block *sb,
+ ext4_group_t group, struct ext4_buddy *e4b);
+extern void ext4_mb_unload_buddy_test(struct ext4_buddy *e4b);
+extern void mb_free_blocks_test(struct inode *inode,
+ struct ext4_buddy *e4b, int first, int count);
+extern void ext4_free_blocks_simple_test(struct inode *inode,
+ ext4_fsblk_t block, unsigned long count);
+#endif
#endif
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index a8c95eee91b7..39fe50b3c662 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -524,9 +524,15 @@ int ext4_bio_write_folio(struct ext4_io_submit *io, struct folio *folio,
nr_to_submit++;
} while ((bh = bh->b_this_page) != head);
- /* Nothing to submit? Just unlock the folio... */
- if (!nr_to_submit)
+ if (!nr_to_submit) {
+ /*
+ * We have nothing to submit. Just cycle the folio through
+ * writeback state to properly update xarray tags.
+ */
+ __folio_start_writeback(folio, keep_towrite);
+ folio_end_writeback(folio);
return 0;
+ }
bh = head = folio_buffers(folio);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 43f680c750ae..a34efb44e73d 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1254,12 +1254,10 @@ static void ext4_group_desc_free(struct ext4_sb_info *sbi)
struct buffer_head **group_desc;
int i;
- rcu_read_lock();
- group_desc = rcu_dereference(sbi->s_group_desc);
+ group_desc = rcu_access_pointer(sbi->s_group_desc);
for (i = 0; i < sbi->s_gdb_count; i++)
brelse(group_desc[i]);
kvfree(group_desc);
- rcu_read_unlock();
}
static void ext4_flex_groups_free(struct ext4_sb_info *sbi)
@@ -1267,14 +1265,12 @@ static void ext4_flex_groups_free(struct ext4_sb_info *sbi)
struct flex_groups **flex_groups;
int i;
- rcu_read_lock();
- flex_groups = rcu_dereference(sbi->s_flex_groups);
+ flex_groups = rcu_access_pointer(sbi->s_flex_groups);
if (flex_groups) {
for (i = 0; i < sbi->s_flex_groups_allocated; i++)
kvfree(flex_groups[i]);
kvfree(flex_groups);
}
- rcu_read_unlock();
}
static void ext4_put_super(struct super_block *sb)
@@ -1527,6 +1523,27 @@ void ext4_clear_inode(struct inode *inode)
invalidate_inode_buffers(inode);
clear_inode(inode);
ext4_discard_preallocations(inode);
+ /*
+ * We must remove the inode from the hash before ext4_free_inode()
+ * clears the bit in inode bitmap as otherwise another process reusing
+ * the inode will block in insert_inode_hash() waiting for inode
+ * eviction to complete while holding transaction handle open, but
+ * ext4_evict_inode() still running for that inode could block waiting
+ * for transaction commit if the inode is marked as IS_SYNC => deadlock.
+ *
+ * Removing the inode from the hash here is safe. There are two cases
+ * to consider:
+ * 1) The inode still has references to it (i_nlink > 0). In that case
+ * we are keeping the inode and once we remove the inode from the hash,
+ * iget() can create the new inode structure for the same inode number
+ * and we are fine with that as all IO on behalf of the inode is
+ * finished.
+ * 2) We are deleting the inode (i_nlink == 0). In that case inode
+ * number cannot be reused until ext4_free_inode() clears the bit in
+ * the inode bitmap, at which point all IO is done and reuse is fine
+ * again.
+ */
+ remove_inode_hash(inode);
ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
dquot_drop(inode);
if (EXT4_I(inode)->jinode) {
@@ -3633,6 +3650,13 @@ int ext4_feature_set_ok(struct super_block *sb, int readonly)
"extents feature\n");
return 0;
}
+ if (ext4_has_feature_bigalloc(sb) &&
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
+ ext4_msg(sb, KERN_WARNING,
+ "bad geometry: bigalloc file system with non-zero "
+ "first_data_block\n");
+ return 0;
+ }
#if !IS_ENABLED(CONFIG_QUOTA) || !IS_ENABLED(CONFIG_QFMT_V2)
if (!readonly && (ext4_has_feature_quota(sb) ||
@@ -5403,6 +5427,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
timer_setup(&sbi->s_err_report, print_daily_error_info, 0);
spin_lock_init(&sbi->s_error_lock);
+ mutex_init(&sbi->s_error_notify_mutex);
INIT_WORK(&sbi->s_sb_upd_work, update_super_work);
err = ext4_group_desc_init(sb, es, logical_sb_block, &first_not_zeroed);
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index b87d7bdab06a..923b375e017f 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -597,7 +597,10 @@ static const struct kobj_type ext4_feat_ktype = {
void ext4_notify_error_sysfs(struct ext4_sb_info *sbi)
{
- sysfs_notify(&sbi->s_kobj, NULL, "errors_count");
+ mutex_lock(&sbi->s_error_notify_mutex);
+ if (sbi->s_kobj.state_in_sysfs)
+ sysfs_notify(&sbi->s_kobj, NULL, "errors_count");
+ mutex_unlock(&sbi->s_error_notify_mutex);
}
static struct kobject *ext4_root;
@@ -610,8 +613,10 @@ int ext4_register_sysfs(struct super_block *sb)
int err;
init_completion(&sbi->s_kobj_unregister);
+ mutex_lock(&sbi->s_error_notify_mutex);
err = kobject_init_and_add(&sbi->s_kobj, &ext4_sb_ktype, ext4_root,
"%s", sb->s_id);
+ mutex_unlock(&sbi->s_error_notify_mutex);
if (err) {
kobject_put(&sbi->s_kobj);
wait_for_completion(&sbi->s_kobj_unregister);
@@ -644,7 +649,10 @@ void ext4_unregister_sysfs(struct super_block *sb)
if (sbi->s_proc)
remove_proc_subtree(sb->s_id, ext4_proc_root);
+
+ mutex_lock(&sbi->s_error_notify_mutex);
kobject_del(&sbi->s_kobj);
+ mutex_unlock(&sbi->s_error_notify_mutex);
}
int __init ext4_init_sysfs(void)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 7c75ed7e8979..3c75ee025bda 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1711,6 +1711,19 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
}
}
+static bool __sync_lazytime(struct inode *inode)
+{
+ spin_lock(&inode->i_lock);
+ if (!(inode_state_read(inode) & I_DIRTY_TIME)) {
+ spin_unlock(&inode->i_lock);
+ return false;
+ }
+ inode_state_clear(inode, I_DIRTY_TIME);
+ spin_unlock(&inode->i_lock);
+ inode->i_op->sync_lazytime(inode);
+ return true;
+}
+
bool sync_lazytime(struct inode *inode)
{
if (!(inode_state_read_once(inode) & I_DIRTY_TIME))
@@ -1718,9 +1731,8 @@ bool sync_lazytime(struct inode *inode)
trace_writeback_lazytime(inode);
if (inode->i_op->sync_lazytime)
- inode->i_op->sync_lazytime(inode);
- else
- mark_inode_dirty_sync(inode);
+ return __sync_lazytime(inode);
+ mark_inode_dirty_sync(inode);
return true;
}
@@ -2775,13 +2787,8 @@ static void wait_sb_inodes(struct super_block *sb)
* The mapping can appear untagged while still on-list since we
* do not have the mapping lock. Skip it here, wb completion
* will remove it.
- *
- * If the mapping does not have data integrity semantics,
- * there's no need to wait for the writeout to complete, as the
- * mapping cannot guarantee that data is persistently stored.
*/
- if (!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK) ||
- mapping_no_data_integrity(mapping))
+ if (!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
continue;
spin_unlock_irq(&sb->s_inode_wblist_lock);
@@ -2916,6 +2923,17 @@ void sync_inodes_sb(struct super_block *sb)
*/
if (bdi == &noop_backing_dev_info)
return;
+
+ /*
+ * If the superblock has SB_I_NO_DATA_INTEGRITY set, there's no need to
+ * wait for the writeout to complete, as the filesystem cannot guarantee
+ * data persistence on sync. Just kick off writeback and return.
+ */
+ if (sb->s_iflags & SB_I_NO_DATA_INTEGRITY) {
+ wakeup_flusher_threads_bdi(bdi, WB_REASON_SYNC);
+ return;
+ }
+
WARN_ON(!rwsem_is_locked(&sb->s_umount));
/* protect against inode wb switch, see inode_switch_wbs_work_fn() */
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index b1bb7153cb78..676fd9856bfb 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -3201,10 +3201,8 @@ void fuse_init_file_inode(struct inode *inode, unsigned int flags)
inode->i_fop = &fuse_file_operations;
inode->i_data.a_ops = &fuse_file_aops;
- if (fc->writeback_cache) {
+ if (fc->writeback_cache)
mapping_set_writeback_may_deadlock_on_reclaim(&inode->i_data);
- mapping_set_no_data_integrity(&inode->i_data);
- }
INIT_LIST_HEAD(&fi->write_files);
INIT_LIST_HEAD(&fi->queued_writes);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index e57b8af06be9..c795abe47a4f 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1709,6 +1709,7 @@ static void fuse_sb_defaults(struct super_block *sb)
sb->s_export_op = &fuse_export_operations;
sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE;
sb->s_iflags |= SB_I_NOIDMAP;
+ sb->s_iflags |= SB_I_NO_DATA_INTEGRITY;
if (sb->s_user_ns != &init_user_ns)
sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER;
sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION);
diff --git a/fs/iomap/bio.c b/fs/iomap/bio.c
index fc045f2e4c45..edd908183058 100644
--- a/fs/iomap/bio.c
+++ b/fs/iomap/bio.c
@@ -8,7 +8,10 @@
#include "internal.h"
#include "trace.h"
-static void iomap_read_end_io(struct bio *bio)
+static DEFINE_SPINLOCK(failed_read_lock);
+static struct bio_list failed_read_list = BIO_EMPTY_LIST;
+
+static void __iomap_read_end_io(struct bio *bio)
{
int error = blk_status_to_errno(bio->bi_status);
struct folio_iter fi;
@@ -18,6 +21,52 @@ static void iomap_read_end_io(struct bio *bio)
bio_put(bio);
}
+static void
+iomap_fail_reads(
+ struct work_struct *work)
+{
+ struct bio *bio;
+ struct bio_list tmp = BIO_EMPTY_LIST;
+ unsigned long flags;
+
+ spin_lock_irqsave(&failed_read_lock, flags);
+ bio_list_merge_init(&tmp, &failed_read_list);
+ spin_unlock_irqrestore(&failed_read_lock, flags);
+
+ while ((bio = bio_list_pop(&tmp)) != NULL) {
+ __iomap_read_end_io(bio);
+ cond_resched();
+ }
+}
+
+static DECLARE_WORK(failed_read_work, iomap_fail_reads);
+
+static void iomap_fail_buffered_read(struct bio *bio)
+{
+ unsigned long flags;
+
+ /*
+ * Bounce I/O errors to a workqueue to avoid nested i_lock acquisitions
+ * in the fserror code. The caller no longer owns the bio reference
+ * after the spinlock drops.
+ */
+ spin_lock_irqsave(&failed_read_lock, flags);
+ if (bio_list_empty(&failed_read_list))
+ WARN_ON_ONCE(!schedule_work(&failed_read_work));
+ bio_list_add(&failed_read_list, bio);
+ spin_unlock_irqrestore(&failed_read_lock, flags);
+}
+
+static void iomap_read_end_io(struct bio *bio)
+{
+ if (bio->bi_status) {
+ iomap_fail_buffered_read(bio);
+ return;
+ }
+
+ __iomap_read_end_io(bio);
+}
+
static void iomap_bio_submit_read(struct iomap_read_folio_ctx *ctx)
{
struct bio *bio = ctx->read_ctx;
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 00f0efaf12b2..92a831cf4bf1 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -514,6 +514,7 @@ static int iomap_read_folio_iter(struct iomap_iter *iter,
loff_t length = iomap_length(iter);
struct folio *folio = ctx->cur_folio;
size_t folio_len = folio_size(folio);
+ struct iomap_folio_state *ifs;
size_t poff, plen;
loff_t pos_diff;
int ret;
@@ -525,7 +526,7 @@ static int iomap_read_folio_iter(struct iomap_iter *iter,
return iomap_iter_advance(iter, length);
}
- ifs_alloc(iter->inode, folio, iter->flags);
+ ifs = ifs_alloc(iter->inode, folio, iter->flags);
length = min_t(loff_t, length, folio_len - offset_in_folio(folio, pos));
while (length) {
@@ -560,11 +561,15 @@ static int iomap_read_folio_iter(struct iomap_iter *iter,
*bytes_submitted += plen;
/*
- * If the entire folio has been read in by the IO
- * helper, then the helper owns the folio and will end
- * the read on it.
+ * Hand off folio ownership to the IO helper when:
+ * 1) The entire folio has been submitted for IO, or
+ * 2) There is no ifs attached to the folio
+ *
+ * Case (2) occurs when 1 << i_blkbits matches the folio
+ * size but the underlying filesystem or block device
+ * uses a smaller granularity for IO.
*/
- if (*bytes_submitted == folio_len)
+ if (*bytes_submitted == folio_len || !ifs)
ctx->cur_folio = NULL;
}
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index de89c5bef607..1508e2f54462 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -267,7 +267,15 @@ restart:
*/
BUFFER_TRACE(bh, "queue");
get_bh(bh);
- J_ASSERT_BH(bh, !buffer_jwrite(bh));
+ if (WARN_ON_ONCE(buffer_jwrite(bh))) {
+ put_bh(bh); /* drop the ref we just took */
+ spin_unlock(&journal->j_list_lock);
+ /* Clean up any previously batched buffers */
+ if (batch_count)
+ __flush_batch(journal, &batch_count);
+ jbd2_journal_abort(journal, -EFSCORRUPTED);
+ return -EFSCORRUPTED;
+ }
journal->j_chkpt_bhs[batch_count++] = bh;
transaction->t_chp_stats.cs_written++;
transaction->t_checkpoint_list = jh->b_cpnext;
@@ -325,7 +333,10 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr))
return 1;
- J_ASSERT(blocknr != 0);
+ if (WARN_ON_ONCE(blocknr == 0)) {
+ jbd2_journal_abort(journal, -EFSCORRUPTED);
+ return -EFSCORRUPTED;
+ }
/*
* We need to make sure that any blocks that were recently written out
diff --git a/fs/mpage.c b/fs/mpage.c
index 7dae5afc2b9e..b3d9f231a04a 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -646,17 +646,24 @@ out:
}
/**
- * mpage_writepages - walk the list of dirty pages of the given address space & writepage() all of them
+ * __mpage_writepages - walk the list of dirty pages of the given address space
+ * & writepage() all of them
* @mapping: address space structure to write
* @wbc: subtract the number of written pages from *@wbc->nr_to_write
* @get_block: the filesystem's block mapper function.
+ * @write_folio: handler to call for each folio before calling
+ * mpage_write_folio()
*
* This is a library function, which implements the writepages()
- * address_space_operation.
+ * address_space_operation. It calls @write_folio handler for each folio. If
+ * the handler returns value > 0, it calls mpage_write_folio() to do the
+ * folio writeback.
*/
int
-mpage_writepages(struct address_space *mapping,
- struct writeback_control *wbc, get_block_t get_block)
+__mpage_writepages(struct address_space *mapping,
+ struct writeback_control *wbc, get_block_t get_block,
+ int (*write_folio)(struct folio *folio,
+ struct writeback_control *wbc))
{
struct mpage_data mpd = {
.get_block = get_block,
@@ -666,11 +673,22 @@ mpage_writepages(struct address_space *mapping,
int error;
blk_start_plug(&plug);
- while ((folio = writeback_iter(mapping, wbc, folio, &error)))
+ while ((folio = writeback_iter(mapping, wbc, folio, &error))) {
+ if (write_folio) {
+ error = write_folio(folio, wbc);
+ /*
+ * == 0 means folio is handled, < 0 means error. In
+ * both cases hand back control to writeback_iter()
+ */
+ if (error <= 0)
+ continue;
+ /* Let mpage_write_folio() handle the folio. */
+ }
error = mpage_write_folio(wbc, folio, &mpd);
+ }
if (mpd.bio)
mpage_bio_submit_write(mpd.bio);
blk_finish_plug(&plug);
return error;
}
-EXPORT_SYMBOL(mpage_writepages);
+EXPORT_SYMBOL(__mpage_writepages);
diff --git a/fs/namei.c b/fs/namei.c
index 58f715f7657e..9e5500dad14f 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2437,8 +2437,14 @@ inside:
EXPORT_SYMBOL(hashlen_string);
/*
- * Calculate the length and hash of the path component, and
- * return the length as the result.
+ * hash_name - Calculate the length and hash of the path component
+ * @nd: the path resolution state
+ * @name: the pathname to read the component from
+ * @lastword: if the component fits in a single word, LAST_WORD_IS_DOT,
+ * LAST_WORD_IS_DOTDOT, or some other value depending on whether the
+ * component is '.', '..', or something else. Otherwise, @lastword is 0.
+ *
+ * Returns: a pointer to the terminating '/' or NUL character in @name.
*/
static inline const char *hash_name(struct nameidata *nd,
const char *name,
diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c
index 88a0d801525f..a8c0d86118c5 100644
--- a/fs/netfs/buffered_read.c
+++ b/fs/netfs/buffered_read.c
@@ -171,9 +171,8 @@ static void netfs_queue_read(struct netfs_io_request *rreq,
spin_lock(&rreq->lock);
list_add_tail(&subreq->rreq_link, &stream->subrequests);
if (list_is_first(&subreq->rreq_link, &stream->subrequests)) {
- stream->front = subreq;
if (!stream->active) {
- stream->collected_to = stream->front->start;
+ stream->collected_to = subreq->start;
/* Store list pointers before active flag */
smp_store_release(&stream->active, true);
}
diff --git a/fs/netfs/direct_read.c b/fs/netfs/direct_read.c
index a498ee8d6674..f72e6da88cca 100644
--- a/fs/netfs/direct_read.c
+++ b/fs/netfs/direct_read.c
@@ -71,9 +71,8 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq)
spin_lock(&rreq->lock);
list_add_tail(&subreq->rreq_link, &stream->subrequests);
if (list_is_first(&subreq->rreq_link, &stream->subrequests)) {
- stream->front = subreq;
if (!stream->active) {
- stream->collected_to = stream->front->start;
+ stream->collected_to = subreq->start;
/* Store list pointers before active flag */
smp_store_release(&stream->active, true);
}
diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c
index dd1451bf7543..f9ab69de3e29 100644
--- a/fs/netfs/direct_write.c
+++ b/fs/netfs/direct_write.c
@@ -111,7 +111,6 @@ static int netfs_unbuffered_write(struct netfs_io_request *wreq)
netfs_prepare_write(wreq, stream, wreq->start + wreq->transferred);
subreq = stream->construct;
stream->construct = NULL;
- stream->front = NULL;
}
/* Check if (re-)preparation failed. */
@@ -186,10 +185,18 @@ static int netfs_unbuffered_write(struct netfs_io_request *wreq)
stream->sreq_max_segs = INT_MAX;
netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
- stream->prepare_write(subreq);
- __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
- netfs_stat(&netfs_n_wh_retry_write_subreq);
+ if (stream->prepare_write) {
+ stream->prepare_write(subreq);
+ __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
+ netfs_stat(&netfs_n_wh_retry_write_subreq);
+ } else {
+ struct iov_iter source;
+
+ netfs_reset_iter(subreq);
+ source = subreq->io_iter;
+ netfs_reissue_write(stream, subreq, &source);
+ }
}
netfs_unbuffered_write_done(wreq);
diff --git a/fs/netfs/iterator.c b/fs/netfs/iterator.c
index 72a435e5fc6d..154a14bb2d7f 100644
--- a/fs/netfs/iterator.c
+++ b/fs/netfs/iterator.c
@@ -143,6 +143,47 @@ static size_t netfs_limit_bvec(const struct iov_iter *iter, size_t start_offset,
}
/*
+ * Select the span of a kvec iterator we're going to use. Limit it by both
+ * maximum size and maximum number of segments. Returns the size of the span
+ * in bytes.
+ */
+static size_t netfs_limit_kvec(const struct iov_iter *iter, size_t start_offset,
+ size_t max_size, size_t max_segs)
+{
+ const struct kvec *kvecs = iter->kvec;
+ unsigned int nkv = iter->nr_segs, ix = 0, nsegs = 0;
+ size_t len, span = 0, n = iter->count;
+ size_t skip = iter->iov_offset + start_offset;
+
+ if (WARN_ON(!iov_iter_is_kvec(iter)) ||
+ WARN_ON(start_offset > n) ||
+ n == 0)
+ return 0;
+
+ while (n && ix < nkv && skip) {
+ len = kvecs[ix].iov_len;
+ if (skip < len)
+ break;
+ skip -= len;
+ n -= len;
+ ix++;
+ }
+
+ while (n && ix < nkv) {
+ len = min3(n, kvecs[ix].iov_len - skip, max_size);
+ span += len;
+ nsegs++;
+ ix++;
+ if (span >= max_size || nsegs >= max_segs)
+ break;
+ skip = 0;
+ n -= len;
+ }
+
+ return min(span, max_size);
+}
+
+/*
* Select the span of an xarray iterator we're going to use. Limit it by both
* maximum size and maximum number of segments. It is assumed that segments
* can be larger than a page in size, provided they're physically contiguous.
@@ -245,6 +286,8 @@ size_t netfs_limit_iter(const struct iov_iter *iter, size_t start_offset,
return netfs_limit_bvec(iter, start_offset, max_size, max_segs);
if (iov_iter_is_xarray(iter))
return netfs_limit_xarray(iter, start_offset, max_size, max_segs);
+ if (iov_iter_is_kvec(iter))
+ return netfs_limit_kvec(iter, start_offset, max_size, max_segs);
BUG();
}
EXPORT_SYMBOL(netfs_limit_iter);
diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c
index 137f0e28a44c..e5f6665b3341 100644
--- a/fs/netfs/read_collect.c
+++ b/fs/netfs/read_collect.c
@@ -205,7 +205,8 @@ reassess:
* in progress. The issuer thread may be adding stuff to the tail
* whilst we're doing this.
*/
- front = READ_ONCE(stream->front);
+ front = list_first_entry_or_null(&stream->subrequests,
+ struct netfs_io_subrequest, rreq_link);
while (front) {
size_t transferred;
@@ -301,7 +302,6 @@ reassess:
list_del_init(&front->rreq_link);
front = list_first_entry_or_null(&stream->subrequests,
struct netfs_io_subrequest, rreq_link);
- stream->front = front;
spin_unlock(&rreq->lock);
netfs_put_subrequest(remove,
notes & ABANDON_SREQ ?
diff --git a/fs/netfs/read_retry.c b/fs/netfs/read_retry.c
index 7793ba5e3e8f..cca9ac43c077 100644
--- a/fs/netfs/read_retry.c
+++ b/fs/netfs/read_retry.c
@@ -93,8 +93,10 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
from->start, from->transferred, from->len);
if (test_bit(NETFS_SREQ_FAILED, &from->flags) ||
- !test_bit(NETFS_SREQ_NEED_RETRY, &from->flags))
+ !test_bit(NETFS_SREQ_NEED_RETRY, &from->flags)) {
+ subreq = from;
goto abandon;
+ }
list_for_each_continue(next, &stream->subrequests) {
subreq = list_entry(next, struct netfs_io_subrequest, rreq_link);
@@ -178,6 +180,7 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
if (subreq == to)
break;
}
+ subreq = NULL;
continue;
}
diff --git a/fs/netfs/read_single.c b/fs/netfs/read_single.c
index 8e6264f62a8f..d0e23bc42445 100644
--- a/fs/netfs/read_single.c
+++ b/fs/netfs/read_single.c
@@ -107,7 +107,6 @@ static int netfs_single_dispatch_read(struct netfs_io_request *rreq)
spin_lock(&rreq->lock);
list_add_tail(&subreq->rreq_link, &stream->subrequests);
trace_netfs_sreq(subreq, netfs_sreq_trace_added);
- stream->front = subreq;
/* Store list pointers before active flag */
smp_store_release(&stream->active, true);
spin_unlock(&rreq->lock);
diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c
index 83eb3dc1adf8..b194447f4b11 100644
--- a/fs/netfs/write_collect.c
+++ b/fs/netfs/write_collect.c
@@ -228,7 +228,8 @@ reassess_streams:
if (!smp_load_acquire(&stream->active))
continue;
- front = stream->front;
+ front = list_first_entry_or_null(&stream->subrequests,
+ struct netfs_io_subrequest, rreq_link);
while (front) {
trace_netfs_collect_sreq(wreq, front);
//_debug("sreq [%x] %llx %zx/%zx",
@@ -279,7 +280,6 @@ reassess_streams:
list_del_init(&front->rreq_link);
front = list_first_entry_or_null(&stream->subrequests,
struct netfs_io_subrequest, rreq_link);
- stream->front = front;
spin_unlock(&wreq->lock);
netfs_put_subrequest(remove,
notes & SAW_FAILURE ?
diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c
index 437268f65640..2db688f94125 100644
--- a/fs/netfs/write_issue.c
+++ b/fs/netfs/write_issue.c
@@ -206,9 +206,8 @@ void netfs_prepare_write(struct netfs_io_request *wreq,
spin_lock(&wreq->lock);
list_add_tail(&subreq->rreq_link, &stream->subrequests);
if (list_is_first(&subreq->rreq_link, &stream->subrequests)) {
- stream->front = subreq;
if (!stream->active) {
- stream->collected_to = stream->front->start;
+ stream->collected_to = subreq->start;
/* Write list pointers before active flag */
smp_store_release(&stream->active, true);
}
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 758611ee4475..13cb60b52bd6 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -1146,15 +1146,15 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
return -EOVERFLOW;
/*
- * With metacopy disabled, we fsync after final metadata copyup, for
+ * With "fsync=strict", we fsync after final metadata copyup, for
* both regular files and directories to get atomic copyup semantics
* on filesystems that do not use strict metadata ordering (e.g. ubifs).
*
- * With metacopy enabled we want to avoid fsync on all meta copyup
+ * By default, we want to avoid fsync on all meta copyup, because
* that will hurt performance of workloads such as chown -R, so we
* only fsync on data copyup as legacy behavior.
*/
- ctx.metadata_fsync = !OVL_FS(dentry->d_sb)->config.metacopy &&
+ ctx.metadata_fsync = ovl_should_sync_metadata(OVL_FS(dentry->d_sb)) &&
(S_ISREG(ctx.stat.mode) || S_ISDIR(ctx.stat.mode));
ctx.metacopy = ovl_need_meta_copy_up(dentry, ctx.stat.mode, flags);
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index cad2055ebf18..63b299bf12f7 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -99,6 +99,12 @@ enum {
OVL_VERITY_REQUIRE,
};
+enum {
+ OVL_FSYNC_VOLATILE,
+ OVL_FSYNC_AUTO,
+ OVL_FSYNC_STRICT,
+};
+
/*
* The tuple (fh,uuid) is a universal unique identifier for a copy up origin,
* where:
@@ -656,6 +662,21 @@ static inline bool ovl_xino_warn(struct ovl_fs *ofs)
return ofs->config.xino == OVL_XINO_ON;
}
+static inline bool ovl_should_sync(struct ovl_fs *ofs)
+{
+ return ofs->config.fsync_mode != OVL_FSYNC_VOLATILE;
+}
+
+static inline bool ovl_should_sync_metadata(struct ovl_fs *ofs)
+{
+ return ofs->config.fsync_mode == OVL_FSYNC_STRICT;
+}
+
+static inline bool ovl_is_volatile(struct ovl_config *config)
+{
+ return config->fsync_mode == OVL_FSYNC_VOLATILE;
+}
+
/*
* To avoid regressions in existing setups with overlay lower offline changes,
* we allow lower changes only if none of the new features are used.
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 1d4828dbcf7a..80cad4ea96a3 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -18,7 +18,7 @@ struct ovl_config {
int xino;
bool metacopy;
bool userxattr;
- bool ovl_volatile;
+ int fsync_mode;
};
struct ovl_sb {
@@ -120,11 +120,6 @@ static inline struct ovl_fs *OVL_FS(struct super_block *sb)
return (struct ovl_fs *)sb->s_fs_info;
}
-static inline bool ovl_should_sync(struct ovl_fs *ofs)
-{
- return !ofs->config.ovl_volatile;
-}
-
static inline unsigned int ovl_numlower(struct ovl_entry *oe)
{
return oe ? oe->__numlower : 0;
diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c
index 8111b437ae5d..c93fcaa45d4a 100644
--- a/fs/overlayfs/params.c
+++ b/fs/overlayfs/params.c
@@ -58,6 +58,7 @@ enum ovl_opt {
Opt_xino,
Opt_metacopy,
Opt_verity,
+ Opt_fsync,
Opt_volatile,
Opt_override_creds,
};
@@ -140,6 +141,23 @@ static int ovl_verity_mode_def(void)
return OVL_VERITY_OFF;
}
+static const struct constant_table ovl_parameter_fsync[] = {
+ { "volatile", OVL_FSYNC_VOLATILE },
+ { "auto", OVL_FSYNC_AUTO },
+ { "strict", OVL_FSYNC_STRICT },
+ {}
+};
+
+static const char *ovl_fsync_mode(struct ovl_config *config)
+{
+ return ovl_parameter_fsync[config->fsync_mode].name;
+}
+
+static int ovl_fsync_mode_def(void)
+{
+ return OVL_FSYNC_AUTO;
+}
+
const struct fs_parameter_spec ovl_parameter_spec[] = {
fsparam_string_empty("lowerdir", Opt_lowerdir),
fsparam_file_or_string("lowerdir+", Opt_lowerdir_add),
@@ -155,6 +173,7 @@ const struct fs_parameter_spec ovl_parameter_spec[] = {
fsparam_enum("xino", Opt_xino, ovl_parameter_xino),
fsparam_enum("metacopy", Opt_metacopy, ovl_parameter_bool),
fsparam_enum("verity", Opt_verity, ovl_parameter_verity),
+ fsparam_enum("fsync", Opt_fsync, ovl_parameter_fsync),
fsparam_flag("volatile", Opt_volatile),
fsparam_flag_no("override_creds", Opt_override_creds),
{}
@@ -665,8 +684,11 @@ static int ovl_parse_param(struct fs_context *fc, struct fs_parameter *param)
case Opt_verity:
config->verity_mode = result.uint_32;
break;
+ case Opt_fsync:
+ config->fsync_mode = result.uint_32;
+ break;
case Opt_volatile:
- config->ovl_volatile = true;
+ config->fsync_mode = OVL_FSYNC_VOLATILE;
break;
case Opt_userxattr:
config->userxattr = true;
@@ -800,6 +822,7 @@ int ovl_init_fs_context(struct fs_context *fc)
ofs->config.nfs_export = ovl_nfs_export_def;
ofs->config.xino = ovl_xino_def();
ofs->config.metacopy = ovl_metacopy_def;
+ ofs->config.fsync_mode = ovl_fsync_mode_def();
fc->s_fs_info = ofs;
fc->fs_private = ctx;
@@ -870,9 +893,9 @@ int ovl_fs_params_verify(const struct ovl_fs_context *ctx,
config->index = false;
}
- if (!config->upperdir && config->ovl_volatile) {
+ if (!config->upperdir && ovl_is_volatile(config)) {
pr_info("option \"volatile\" is meaningless in a non-upper mount, ignoring it.\n");
- config->ovl_volatile = false;
+ config->fsync_mode = ovl_fsync_mode_def();
}
if (!config->upperdir && config->uuid == OVL_UUID_ON) {
@@ -1070,8 +1093,8 @@ int ovl_show_options(struct seq_file *m, struct dentry *dentry)
seq_printf(m, ",xino=%s", ovl_xino_mode(&ofs->config));
if (ofs->config.metacopy != ovl_metacopy_def)
seq_printf(m, ",metacopy=%s", str_on_off(ofs->config.metacopy));
- if (ofs->config.ovl_volatile)
- seq_puts(m, ",volatile");
+ if (ofs->config.fsync_mode != ovl_fsync_mode_def())
+ seq_printf(m, ",fsync=%s", ovl_fsync_mode(&ofs->config));
if (ofs->config.userxattr)
seq_puts(m, ",userxattr");
if (ofs->config.verity_mode != ovl_verity_mode_def())
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index d4c12feec039..0822987cfb51 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -776,7 +776,7 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
* For volatile mount, create a incompat/volatile/dirty file to keep
* track of it.
*/
- if (ofs->config.ovl_volatile) {
+ if (ovl_is_volatile(&ofs->config)) {
err = ovl_create_volatile_dirty(ofs);
if (err < 0) {
pr_err("Failed to create volatile/dirty file.\n");
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 3f1b763a8bb4..2ea769f311c3 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -85,7 +85,10 @@ int ovl_can_decode_fh(struct super_block *sb)
if (!exportfs_can_decode_fh(sb->s_export_op))
return 0;
- return sb->s_export_op->encode_fh ? -1 : FILEID_INO32_GEN;
+ if (sb->s_export_op->encode_fh == generic_encode_ino32_fh)
+ return FILEID_INO32_GEN;
+
+ return -1;
}
struct dentry *ovl_indexdir(struct super_block *sb)
diff --git a/fs/smb/client/Makefile b/fs/smb/client/Makefile
index 26b6105f04d1..1a6e1e1c9764 100644
--- a/fs/smb/client/Makefile
+++ b/fs/smb/client/Makefile
@@ -48,8 +48,8 @@ cifs-$(CONFIG_CIFS_COMPRESSION) += compress.o compress/lz77.o
# Build the SMB2 error mapping table from smb2status.h
#
$(obj)/smb2_mapping_table.c: $(src)/../common/smb2status.h \
- $(src)/gen_smb2_mapping
- $(call cmd,gen_smb2_mapping)
+ $(src)/gen_smb2_mapping FORCE
+ $(call if_changed,gen_smb2_mapping)
$(obj)/smb2maperror.o: $(obj)/smb2_mapping_table.c
@@ -58,4 +58,5 @@ quiet_cmd_gen_smb2_mapping = GEN $@
obj-$(CONFIG_SMB_KUNIT_TESTS) += smb2maperror_test.o
-clean-files += smb2_mapping_table.c
+# Let Kbuild handle tracking and cleaning
+targets += smb2_mapping_table.c
diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c
index a4a7c7eee038..a46764c24710 100644
--- a/fs/smb/client/fs_context.c
+++ b/fs/smb/client/fs_context.c
@@ -588,6 +588,10 @@ char *cifs_sanitize_prepath(char *prepath, gfp_t gfp)
while (IS_DELIM(*cursor1))
cursor1++;
+ /* exit in case of only delimiters */
+ if (!*cursor1)
+ return NULL;
+
/* copy the first letter */
*cursor2 = *cursor1;
diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c
index 393a4ae47cc1..9b2bb8764a80 100644
--- a/fs/smb/server/oplock.c
+++ b/fs/smb/server/oplock.c
@@ -82,11 +82,19 @@ static void lease_del_list(struct oplock_info *opinfo)
spin_unlock(&lb->lb_lock);
}
-static void lb_add(struct lease_table *lb)
+static struct lease_table *alloc_lease_table(struct oplock_info *opinfo)
{
- write_lock(&lease_list_lock);
- list_add(&lb->l_entry, &lease_table_list);
- write_unlock(&lease_list_lock);
+ struct lease_table *lb;
+
+ lb = kmalloc_obj(struct lease_table, KSMBD_DEFAULT_GFP);
+ if (!lb)
+ return NULL;
+
+ memcpy(lb->client_guid, opinfo->conn->ClientGUID,
+ SMB2_CLIENT_GUID_SIZE);
+ INIT_LIST_HEAD(&lb->lease_list);
+ spin_lock_init(&lb->lb_lock);
+ return lb;
}
static int alloc_lease(struct oplock_info *opinfo, struct lease_ctx_info *lctx)
@@ -1042,34 +1050,27 @@ static void copy_lease(struct oplock_info *op1, struct oplock_info *op2)
lease2->version = lease1->version;
}
-static int add_lease_global_list(struct oplock_info *opinfo)
+static void add_lease_global_list(struct oplock_info *opinfo,
+ struct lease_table *new_lb)
{
struct lease_table *lb;
- read_lock(&lease_list_lock);
+ write_lock(&lease_list_lock);
list_for_each_entry(lb, &lease_table_list, l_entry) {
if (!memcmp(lb->client_guid, opinfo->conn->ClientGUID,
SMB2_CLIENT_GUID_SIZE)) {
opinfo->o_lease->l_lb = lb;
lease_add_list(opinfo);
- read_unlock(&lease_list_lock);
- return 0;
+ write_unlock(&lease_list_lock);
+ kfree(new_lb);
+ return;
}
}
- read_unlock(&lease_list_lock);
- lb = kmalloc_obj(struct lease_table, KSMBD_DEFAULT_GFP);
- if (!lb)
- return -ENOMEM;
-
- memcpy(lb->client_guid, opinfo->conn->ClientGUID,
- SMB2_CLIENT_GUID_SIZE);
- INIT_LIST_HEAD(&lb->lease_list);
- spin_lock_init(&lb->lb_lock);
- opinfo->o_lease->l_lb = lb;
+ opinfo->o_lease->l_lb = new_lb;
lease_add_list(opinfo);
- lb_add(lb);
- return 0;
+ list_add(&new_lb->l_entry, &lease_table_list);
+ write_unlock(&lease_list_lock);
}
static void set_oplock_level(struct oplock_info *opinfo, int level,
@@ -1189,6 +1190,7 @@ int smb_grant_oplock(struct ksmbd_work *work, int req_op_level, u64 pid,
int err = 0;
struct oplock_info *opinfo = NULL, *prev_opinfo = NULL;
struct ksmbd_inode *ci = fp->f_ci;
+ struct lease_table *new_lb = NULL;
bool prev_op_has_lease;
__le32 prev_op_state = 0;
@@ -1291,21 +1293,37 @@ set_lev:
set_oplock_level(opinfo, req_op_level, lctx);
out:
- opinfo_count_inc(fp);
- opinfo_add(opinfo, fp);
-
+ /*
+ * Set o_fp before any publication so that concurrent readers
+ * (e.g. find_same_lease_key() on the lease list) that
+ * dereference opinfo->o_fp don't hit a NULL pointer.
+ *
+ * Keep the original publication order so concurrent opens can
+ * still observe the in-flight grant via ci->m_op_list, but make
+ * everything after opinfo_add() no-fail by preallocating any new
+ * lease_table first.
+ */
+ opinfo->o_fp = fp;
if (opinfo->is_lease) {
- err = add_lease_global_list(opinfo);
- if (err)
+ new_lb = alloc_lease_table(opinfo);
+ if (!new_lb) {
+ err = -ENOMEM;
goto err_out;
+ }
}
+ opinfo_count_inc(fp);
+ opinfo_add(opinfo, fp);
+
+ if (opinfo->is_lease)
+ add_lease_global_list(opinfo, new_lb);
+
rcu_assign_pointer(fp->f_opinfo, opinfo);
- opinfo->o_fp = fp;
return 0;
err_out:
- __free_opinfo(opinfo);
+ kfree(new_lb);
+ opinfo_put(opinfo);
return err;
}
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index 9c44e71e3c3b..8e4cfdc0ba02 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -1939,8 +1939,14 @@ out_err:
if (sess->user && sess->user->flags & KSMBD_USER_FLAG_DELAY_SESSION)
try_delay = true;
- sess->last_active = jiffies;
- sess->state = SMB2_SESSION_EXPIRED;
+ /*
+ * For binding requests, session belongs to another
+ * connection. Do not expire it.
+ */
+ if (!(req->Flags & SMB2_SESSION_REQ_FLAG_BINDING)) {
+ sess->last_active = jiffies;
+ sess->state = SMB2_SESSION_EXPIRED;
+ }
ksmbd_user_session_put(sess);
work->sess = NULL;
if (try_delay) {
@@ -3396,20 +3402,24 @@ int smb2_open(struct ksmbd_work *work)
KSMBD_SHARE_FLAG_ACL_XATTR)) {
struct smb_fattr fattr;
struct smb_ntsd *pntsd;
- int pntsd_size, ace_num = 0;
+ int pntsd_size;
+ size_t scratch_len;
ksmbd_acls_fattr(&fattr, idmap, inode);
- if (fattr.cf_acls)
- ace_num = fattr.cf_acls->a_count;
- if (fattr.cf_dacls)
- ace_num += fattr.cf_dacls->a_count;
-
- pntsd = kmalloc(sizeof(struct smb_ntsd) +
- sizeof(struct smb_sid) * 3 +
- sizeof(struct smb_acl) +
- sizeof(struct smb_ace) * ace_num * 2,
- KSMBD_DEFAULT_GFP);
+ scratch_len = smb_acl_sec_desc_scratch_len(&fattr,
+ NULL, 0,
+ OWNER_SECINFO | GROUP_SECINFO |
+ DACL_SECINFO);
+ if (!scratch_len || scratch_len == SIZE_MAX) {
+ rc = -EFBIG;
+ posix_acl_release(fattr.cf_acls);
+ posix_acl_release(fattr.cf_dacls);
+ goto err_out;
+ }
+
+ pntsd = kvzalloc(scratch_len, KSMBD_DEFAULT_GFP);
if (!pntsd) {
+ rc = -ENOMEM;
posix_acl_release(fattr.cf_acls);
posix_acl_release(fattr.cf_dacls);
goto err_out;
@@ -3424,7 +3434,7 @@ int smb2_open(struct ksmbd_work *work)
posix_acl_release(fattr.cf_acls);
posix_acl_release(fattr.cf_dacls);
if (rc) {
- kfree(pntsd);
+ kvfree(pntsd);
goto err_out;
}
@@ -3434,7 +3444,7 @@ int smb2_open(struct ksmbd_work *work)
pntsd,
pntsd_size,
false);
- kfree(pntsd);
+ kvfree(pntsd);
if (rc)
pr_err("failed to store ntacl in xattr : %d\n",
rc);
@@ -4446,8 +4456,9 @@ int smb2_query_dir(struct ksmbd_work *work)
d_info.wptr = (char *)rsp->Buffer;
d_info.rptr = (char *)rsp->Buffer;
d_info.out_buf_len =
- smb2_calc_max_out_buf_len(work, 8,
- le32_to_cpu(req->OutputBufferLength));
+ smb2_calc_max_out_buf_len(work,
+ offsetof(struct smb2_query_directory_rsp, Buffer),
+ le32_to_cpu(req->OutputBufferLength));
if (d_info.out_buf_len < 0) {
rc = -EINVAL;
goto err_out;
@@ -4714,8 +4725,9 @@ static int smb2_get_ea(struct ksmbd_work *work, struct ksmbd_file *fp,
}
buf_free_len =
- smb2_calc_max_out_buf_len(work, 8,
- le32_to_cpu(req->OutputBufferLength));
+ smb2_calc_max_out_buf_len(work,
+ offsetof(struct smb2_query_info_rsp, Buffer),
+ le32_to_cpu(req->OutputBufferLength));
if (buf_free_len < 0)
return -EINVAL;
@@ -4932,7 +4944,8 @@ static int get_file_all_info(struct ksmbd_work *work,
int conv_len;
char *filename;
u64 time;
- int ret;
+ int ret, buf_free_len, filename_len;
+ struct smb2_query_info_req *req = ksmbd_req_buf_next(work);
if (!(fp->daccess & FILE_READ_ATTRIBUTES_LE)) {
ksmbd_debug(SMB, "no right to read the attributes : 0x%x\n",
@@ -4944,6 +4957,16 @@ static int get_file_all_info(struct ksmbd_work *work,
if (IS_ERR(filename))
return PTR_ERR(filename);
+ filename_len = strlen(filename);
+ buf_free_len = smb2_calc_max_out_buf_len(work,
+ offsetof(struct smb2_query_info_rsp, Buffer) +
+ offsetof(struct smb2_file_all_info, FileName),
+ le32_to_cpu(req->OutputBufferLength));
+ if (buf_free_len < (filename_len + 1) * 2) {
+ kfree(filename);
+ return -EINVAL;
+ }
+
ret = vfs_getattr(&fp->filp->f_path, &stat, STATX_BASIC_STATS,
AT_STATX_SYNC_AS_STAT);
if (ret) {
@@ -4987,7 +5010,8 @@ static int get_file_all_info(struct ksmbd_work *work,
file_info->Mode = fp->coption;
file_info->AlignmentRequirement = 0;
conv_len = smbConvertToUTF16((__le16 *)file_info->FileName, filename,
- PATH_MAX, conn->local_nls, 0);
+ min(filename_len, PATH_MAX),
+ conn->local_nls, 0);
conv_len *= 2;
file_info->FileNameLength = cpu_to_le32(conv_len);
rsp->OutputBufferLength =
@@ -5041,8 +5065,9 @@ static int get_file_stream_info(struct ksmbd_work *work,
file_info = (struct smb2_file_stream_info *)rsp->Buffer;
buf_free_len =
- smb2_calc_max_out_buf_len(work, 8,
- le32_to_cpu(req->OutputBufferLength));
+ smb2_calc_max_out_buf_len(work,
+ offsetof(struct smb2_query_info_rsp, Buffer),
+ le32_to_cpu(req->OutputBufferLength));
if (buf_free_len < 0)
goto out;
@@ -5351,8 +5376,9 @@ static int smb2_get_info_file(struct ksmbd_work *work,
if (test_share_config_flag(work->tcon->share_conf,
KSMBD_SHARE_FLAG_PIPE)) {
/* smb2 info file called for pipe */
- return smb2_get_info_file_pipe(work->sess, req, rsp,
+ rc = smb2_get_info_file_pipe(work->sess, req, rsp,
work->response_buf);
+ goto iov_pin_out;
}
if (work->next_smb2_rcv_hdr_off) {
@@ -5452,6 +5478,12 @@ static int smb2_get_info_file(struct ksmbd_work *work,
rc = buffer_check_err(le32_to_cpu(req->OutputBufferLength),
rsp, work->response_buf);
ksmbd_fd_put(work, fp);
+
+iov_pin_out:
+ if (!rc)
+ rc = ksmbd_iov_pin_rsp(work, (void *)rsp,
+ offsetof(struct smb2_query_info_rsp, Buffer) +
+ le32_to_cpu(rsp->OutputBufferLength));
return rc;
}
@@ -5678,6 +5710,11 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
rc = buffer_check_err(le32_to_cpu(req->OutputBufferLength),
rsp, work->response_buf);
path_put(&path);
+
+ if (!rc)
+ rc = ksmbd_iov_pin_rsp(work, (void *)rsp,
+ offsetof(struct smb2_query_info_rsp, Buffer) +
+ le32_to_cpu(rsp->OutputBufferLength));
return rc;
}
@@ -5687,13 +5724,14 @@ static int smb2_get_info_sec(struct ksmbd_work *work,
{
struct ksmbd_file *fp;
struct mnt_idmap *idmap;
- struct smb_ntsd *pntsd = (struct smb_ntsd *)rsp->Buffer, *ppntsd = NULL;
+ struct smb_ntsd *pntsd = NULL, *ppntsd = NULL;
struct smb_fattr fattr = {{0}};
struct inode *inode;
__u32 secdesclen = 0;
unsigned int id = KSMBD_NO_FID, pid = KSMBD_NO_FID;
int addition_info = le32_to_cpu(req->AdditionalInformation);
- int rc = 0, ppntsd_size = 0;
+ int rc = 0, ppntsd_size = 0, max_len;
+ size_t scratch_len = 0;
if (addition_info & ~(OWNER_SECINFO | GROUP_SECINFO | DACL_SECINFO |
PROTECTED_DACL_SECINFO |
@@ -5701,6 +5739,11 @@ static int smb2_get_info_sec(struct ksmbd_work *work,
ksmbd_debug(SMB, "Unsupported addition info: 0x%x)\n",
addition_info);
+ pntsd = kzalloc(ALIGN(sizeof(struct smb_ntsd), 8),
+ KSMBD_DEFAULT_GFP);
+ if (!pntsd)
+ return -ENOMEM;
+
pntsd->revision = cpu_to_le16(1);
pntsd->type = cpu_to_le16(SELF_RELATIVE | DACL_PROTECTED);
pntsd->osidoffset = 0;
@@ -5709,9 +5752,7 @@ static int smb2_get_info_sec(struct ksmbd_work *work,
pntsd->dacloffset = 0;
secdesclen = sizeof(struct smb_ntsd);
- rsp->OutputBufferLength = cpu_to_le32(secdesclen);
-
- return 0;
+ goto iov_pin;
}
if (work->next_smb2_rcv_hdr_off) {
@@ -5743,18 +5784,58 @@ static int smb2_get_info_sec(struct ksmbd_work *work,
&ppntsd);
/* Check if sd buffer size exceeds response buffer size */
- if (smb2_resp_buf_len(work, 8) > ppntsd_size)
- rc = build_sec_desc(idmap, pntsd, ppntsd, ppntsd_size,
- addition_info, &secdesclen, &fattr);
+ max_len = smb2_calc_max_out_buf_len(work,
+ offsetof(struct smb2_query_info_rsp, Buffer),
+ le32_to_cpu(req->OutputBufferLength));
+ if (max_len < 0) {
+ rc = -EINVAL;
+ goto release_acl;
+ }
+
+ scratch_len = smb_acl_sec_desc_scratch_len(&fattr, ppntsd,
+ ppntsd_size, addition_info);
+ if (!scratch_len || scratch_len == SIZE_MAX) {
+ rc = -EFBIG;
+ goto release_acl;
+ }
+
+ pntsd = kvzalloc(scratch_len, KSMBD_DEFAULT_GFP);
+ if (!pntsd) {
+ rc = -ENOMEM;
+ goto release_acl;
+ }
+
+ rc = build_sec_desc(idmap, pntsd, ppntsd, ppntsd_size,
+ addition_info, &secdesclen, &fattr);
+
+release_acl:
posix_acl_release(fattr.cf_acls);
posix_acl_release(fattr.cf_dacls);
kfree(ppntsd);
ksmbd_fd_put(work, fp);
+
+ if (!rc && ALIGN(secdesclen, 8) > scratch_len)
+ rc = -EFBIG;
if (rc)
- return rc;
+ goto err_out;
+iov_pin:
rsp->OutputBufferLength = cpu_to_le32(secdesclen);
- return 0;
+ rc = buffer_check_err(le32_to_cpu(req->OutputBufferLength),
+ rsp, work->response_buf);
+ if (rc)
+ goto err_out;
+
+ rc = ksmbd_iov_pin_rsp_read(work, (void *)rsp,
+ offsetof(struct smb2_query_info_rsp, Buffer),
+ pntsd, secdesclen);
+err_out:
+ if (rc) {
+ rsp->OutputBufferLength = 0;
+ kvfree(pntsd);
+ }
+
+ return rc;
}
/**
@@ -5778,6 +5859,9 @@ int smb2_query_info(struct ksmbd_work *work)
goto err_out;
}
+ rsp->StructureSize = cpu_to_le16(9);
+ rsp->OutputBufferOffset = cpu_to_le16(72);
+
switch (req->InfoType) {
case SMB2_O_INFO_FILE:
ksmbd_debug(SMB, "GOT SMB2_O_INFO_FILE\n");
@@ -5798,14 +5882,6 @@ int smb2_query_info(struct ksmbd_work *work)
}
ksmbd_revert_fsids(work);
- if (!rc) {
- rsp->StructureSize = cpu_to_le16(9);
- rsp->OutputBufferOffset = cpu_to_le16(72);
- rc = ksmbd_iov_pin_rsp(work, (void *)rsp,
- offsetof(struct smb2_query_info_rsp, Buffer) +
- le32_to_cpu(rsp->OutputBufferLength));
- }
-
err_out:
if (rc < 0) {
if (rc == -EACCES)
@@ -5816,6 +5892,8 @@ err_out:
rsp->hdr.Status = STATUS_UNEXPECTED_IO_ERROR;
else if (rc == -ENOMEM)
rsp->hdr.Status = STATUS_INSUFFICIENT_RESOURCES;
+ else if (rc == -EINVAL && rsp->hdr.Status == 0)
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
else if (rc == -EOPNOTSUPP || rsp->hdr.Status == 0)
rsp->hdr.Status = STATUS_INVALID_INFO_CLASS;
smb2_set_err_rsp(work);
@@ -7586,14 +7664,15 @@ retry:
rc = vfs_lock_file(filp, smb_lock->cmd, flock, NULL);
skip:
if (smb_lock->flags & SMB2_LOCKFLAG_UNLOCK) {
+ locks_free_lock(flock);
+ kfree(smb_lock);
if (!rc) {
ksmbd_debug(SMB, "File unlocked\n");
} else if (rc == -ENOENT) {
rsp->hdr.Status = STATUS_NOT_LOCKED;
+ err = rc;
goto out;
}
- locks_free_lock(flock);
- kfree(smb_lock);
} else {
if (rc == FILE_LOCK_DEFERRED) {
void **argv;
@@ -7662,6 +7741,9 @@ skip:
spin_unlock(&work->conn->llist_lock);
ksmbd_debug(SMB, "successful in taking lock\n");
} else {
+ locks_free_lock(flock);
+ kfree(smb_lock);
+ err = rc;
goto out;
}
}
@@ -7692,13 +7774,17 @@ out:
struct file_lock *rlock = NULL;
rlock = smb_flock_init(filp);
- rlock->c.flc_type = F_UNLCK;
- rlock->fl_start = smb_lock->start;
- rlock->fl_end = smb_lock->end;
+ if (rlock) {
+ rlock->c.flc_type = F_UNLCK;
+ rlock->fl_start = smb_lock->start;
+ rlock->fl_end = smb_lock->end;
- rc = vfs_lock_file(filp, F_SETLK, rlock, NULL);
- if (rc)
- pr_err("rollback unlock fail : %d\n", rc);
+ rc = vfs_lock_file(filp, F_SETLK, rlock, NULL);
+ if (rc)
+ pr_err("rollback unlock fail : %d\n", rc);
+ } else {
+ pr_err("rollback unlock alloc failed\n");
+ }
list_del(&smb_lock->llist);
spin_lock(&work->conn->llist_lock);
@@ -7708,7 +7794,8 @@ out:
spin_unlock(&work->conn->llist_lock);
locks_free_lock(smb_lock->fl);
- locks_free_lock(rlock);
+ if (rlock)
+ locks_free_lock(rlock);
kfree(smb_lock);
}
out2:
@@ -8191,8 +8278,9 @@ int smb2_ioctl(struct ksmbd_work *work)
buffer = (char *)req + le32_to_cpu(req->InputOffset);
cnt_code = le32_to_cpu(req->CtlCode);
- ret = smb2_calc_max_out_buf_len(work, 48,
- le32_to_cpu(req->MaxOutputResponse));
+ ret = smb2_calc_max_out_buf_len(work,
+ offsetof(struct smb2_ioctl_rsp, Buffer),
+ le32_to_cpu(req->MaxOutputResponse));
if (ret < 0) {
rsp->hdr.Status = STATUS_INVALID_PARAMETER;
goto out;
diff --git a/fs/smb/server/smbacl.c b/fs/smb/server/smbacl.c
index 49c2abb29bf5..c30d01877c41 100644
--- a/fs/smb/server/smbacl.c
+++ b/fs/smb/server/smbacl.c
@@ -915,6 +915,49 @@ int parse_sec_desc(struct mnt_idmap *idmap, struct smb_ntsd *pntsd,
return 0;
}
+size_t smb_acl_sec_desc_scratch_len(struct smb_fattr *fattr,
+ struct smb_ntsd *ppntsd, int ppntsd_size, int addition_info)
+{
+ size_t len = sizeof(struct smb_ntsd);
+ size_t tmp;
+
+ if (addition_info & OWNER_SECINFO)
+ len += sizeof(struct smb_sid);
+ if (addition_info & GROUP_SECINFO)
+ len += sizeof(struct smb_sid);
+ if (!(addition_info & DACL_SECINFO))
+ return len;
+
+ len += sizeof(struct smb_acl);
+ if (ppntsd && ppntsd_size > 0) {
+ unsigned int dacl_offset = le32_to_cpu(ppntsd->dacloffset);
+
+ if (dacl_offset < ppntsd_size &&
+ check_add_overflow(len, ppntsd_size - dacl_offset, &len))
+ return 0;
+ }
+
+ if (fattr->cf_acls) {
+ if (check_mul_overflow((size_t)fattr->cf_acls->a_count,
+ 2 * sizeof(struct smb_ace), &tmp) ||
+ check_add_overflow(len, tmp, &len))
+ return 0;
+ } else {
+ /* default/minimum DACL */
+ if (check_add_overflow(len, 5 * sizeof(struct smb_ace), &len))
+ return 0;
+ }
+
+ if (fattr->cf_dacls) {
+ if (check_mul_overflow((size_t)fattr->cf_dacls->a_count,
+ sizeof(struct smb_ace), &tmp) ||
+ check_add_overflow(len, tmp, &len))
+ return 0;
+ }
+
+ return len;
+}
+
/* Convert permission bits from mode to equivalent CIFS ACL */
int build_sec_desc(struct mnt_idmap *idmap,
struct smb_ntsd *pntsd, struct smb_ntsd *ppntsd,
diff --git a/fs/smb/server/smbacl.h b/fs/smb/server/smbacl.h
index 355adaee39b8..ab21ba2cd4df 100644
--- a/fs/smb/server/smbacl.h
+++ b/fs/smb/server/smbacl.h
@@ -101,6 +101,8 @@ int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon,
bool type_check, bool get_write);
void id_to_sid(unsigned int cid, uint sidtype, struct smb_sid *ssid);
void ksmbd_init_domain(u32 *sub_auth);
+size_t smb_acl_sec_desc_scratch_len(struct smb_fattr *fattr,
+ struct smb_ntsd *ppntsd, int ppntsd_size, int addition_info);
static inline uid_t posix_acl_uid_translate(struct mnt_idmap *idmap,
struct posix_acl_entry *pace)
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 7fae8002344a..23e894092dab 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -181,22 +181,23 @@ static void udf_write_failed(struct address_space *mapping, loff_t to)
}
}
-static int udf_adinicb_writepages(struct address_space *mapping,
- struct writeback_control *wbc)
+static int udf_handle_page_wb(struct folio *folio,
+ struct writeback_control *wbc)
{
- struct inode *inode = mapping->host;
+ struct inode *inode = folio->mapping->host;
struct udf_inode_info *iinfo = UDF_I(inode);
- struct folio *folio = NULL;
- int error = 0;
- while ((folio = writeback_iter(mapping, wbc, folio, &error))) {
- BUG_ON(!folio_test_locked(folio));
- BUG_ON(folio->index != 0);
- memcpy_from_file_folio(iinfo->i_data + iinfo->i_lenEAttr, folio,
- 0, i_size_read(inode));
- folio_unlock(folio);
- }
+ /*
+ * Inodes in the normal format are handled by the generic code. This
+ * check is race-free as the folio lock protects us from inode type
+ * conversion.
+ */
+ if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB)
+ return 1;
+ memcpy_from_file_folio(iinfo->i_data + iinfo->i_lenEAttr, folio,
+ 0, i_size_read(inode));
+ folio_unlock(folio);
mark_inode_dirty(inode);
return 0;
}
@@ -204,12 +205,8 @@ static int udf_adinicb_writepages(struct address_space *mapping,
static int udf_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
- struct inode *inode = mapping->host;
- struct udf_inode_info *iinfo = UDF_I(inode);
-
- if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
- return udf_adinicb_writepages(mapping, wbc);
- return mpage_writepages(mapping, wbc, udf_get_block_wb);
+ return __mpage_writepages(mapping, wbc, udf_get_block_wb,
+ udf_handle_page_wb);
}
static void udf_adinicb_read_folio(struct folio *folio)
diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h
index 8244305949de..67fd9c75ac3f 100644
--- a/fs/xfs/libxfs/xfs_attr.h
+++ b/fs/xfs/libxfs/xfs_attr.h
@@ -55,7 +55,8 @@ struct xfs_attr_list_context {
struct xfs_trans *tp;
struct xfs_inode *dp; /* inode */
struct xfs_attrlist_cursor_kern cursor; /* position in list */
- void *buffer; /* output buffer */
+ /* output buffer */
+ void *buffer __counted_by_ptr(bufsize);
/*
* Abort attribute list iteration if non-zero. Can be used to pass
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 47f48ae555c0..2b78041e8672 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -1416,6 +1416,28 @@ xfs_attr3_leaf_create(
}
/*
+ * Reinitialize an existing attr fork block as an empty leaf, and attach
+ * the buffer to tp.
+ */
+int
+xfs_attr3_leaf_init(
+ struct xfs_trans *tp,
+ struct xfs_inode *dp,
+ xfs_dablk_t blkno)
+{
+ struct xfs_buf *bp = NULL;
+ struct xfs_da_args args = {
+ .trans = tp,
+ .dp = dp,
+ .owner = dp->i_ino,
+ .geo = dp->i_mount->m_attr_geo,
+ };
+
+ ASSERT(tp != NULL);
+
+ return xfs_attr3_leaf_create(&args, blkno, &bp);
+}
+/*
* Split the leaf node, rebalance, then add the new entry.
*
* Returns 0 if the entry was added, 1 if a further split is needed or a
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h
index aca46da2bc50..72639efe6ac3 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.h
+++ b/fs/xfs/libxfs/xfs_attr_leaf.h
@@ -87,6 +87,9 @@ int xfs_attr3_leaf_list_int(struct xfs_buf *bp,
/*
* Routines used for shrinking the Btree.
*/
+
+int xfs_attr3_leaf_init(struct xfs_trans *tp, struct xfs_inode *dp,
+ xfs_dablk_t blkno);
int xfs_attr3_leaf_toosmall(struct xfs_da_state *state, int *retval);
void xfs_attr3_leaf_unbalance(struct xfs_da_state *state,
struct xfs_da_state_blk *drop_blk,
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index 09d4c17b3e7b..ad801b7bd2dd 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -1506,21 +1506,20 @@ xfs_da3_fixhashpath(
}
/*
- * Remove an entry from an intermediate node.
+ * Internal implementation to remove an entry from an intermediate node.
*/
STATIC void
-xfs_da3_node_remove(
- struct xfs_da_state *state,
- struct xfs_da_state_blk *drop_blk)
+__xfs_da3_node_remove(
+ struct xfs_trans *tp,
+ struct xfs_inode *dp,
+ struct xfs_da_geometry *geo,
+ struct xfs_da_state_blk *drop_blk)
{
struct xfs_da_intnode *node;
struct xfs_da3_icnode_hdr nodehdr;
struct xfs_da_node_entry *btree;
int index;
int tmp;
- struct xfs_inode *dp = state->args->dp;
-
- trace_xfs_da_node_remove(state->args);
node = drop_blk->bp->b_addr;
xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
@@ -1536,17 +1535,17 @@ xfs_da3_node_remove(
tmp = nodehdr.count - index - 1;
tmp *= (uint)sizeof(xfs_da_node_entry_t);
memmove(&btree[index], &btree[index + 1], tmp);
- xfs_trans_log_buf(state->args->trans, drop_blk->bp,
+ xfs_trans_log_buf(tp, drop_blk->bp,
XFS_DA_LOGRANGE(node, &btree[index], tmp));
index = nodehdr.count - 1;
}
memset(&btree[index], 0, sizeof(xfs_da_node_entry_t));
- xfs_trans_log_buf(state->args->trans, drop_blk->bp,
+ xfs_trans_log_buf(tp, drop_blk->bp,
XFS_DA_LOGRANGE(node, &btree[index], sizeof(btree[index])));
nodehdr.count -= 1;
xfs_da3_node_hdr_to_disk(dp->i_mount, node, &nodehdr);
- xfs_trans_log_buf(state->args->trans, drop_blk->bp,
- XFS_DA_LOGRANGE(node, &node->hdr, state->args->geo->node_hdr_size));
+ xfs_trans_log_buf(tp, drop_blk->bp,
+ XFS_DA_LOGRANGE(node, &node->hdr, geo->node_hdr_size));
/*
* Copy the last hash value from the block to propagate upwards.
@@ -1555,6 +1554,38 @@ xfs_da3_node_remove(
}
/*
+ * Remove an entry from an intermediate node.
+ */
+STATIC void
+xfs_da3_node_remove(
+ struct xfs_da_state *state,
+ struct xfs_da_state_blk *drop_blk)
+{
+ trace_xfs_da_node_remove(state->args);
+
+ __xfs_da3_node_remove(state->args->trans, state->args->dp,
+ state->args->geo, drop_blk);
+}
+
+/*
+ * Remove an entry from an intermediate attr node at the specified index.
+ */
+void
+xfs_attr3_node_entry_remove(
+ struct xfs_trans *tp,
+ struct xfs_inode *dp,
+ struct xfs_buf *bp,
+ int index)
+{
+ struct xfs_da_state_blk blk = {
+ .index = index,
+ .bp = bp,
+ };
+
+ __xfs_da3_node_remove(tp, dp, dp->i_mount->m_attr_geo, &blk);
+}
+
+/*
* Unbalance the elements between two intermediate nodes,
* move all Btree elements from one node into another.
*/
diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h
index 354d5d65043e..afcf2d3c7a21 100644
--- a/fs/xfs/libxfs/xfs_da_btree.h
+++ b/fs/xfs/libxfs/xfs_da_btree.h
@@ -184,6 +184,8 @@ int xfs_da3_split(xfs_da_state_t *state);
int xfs_da3_join(xfs_da_state_t *state);
void xfs_da3_fixhashpath(struct xfs_da_state *state,
struct xfs_da_state_path *path_to_to_fix);
+void xfs_attr3_node_entry_remove(struct xfs_trans *tp, struct xfs_inode *dp,
+ struct xfs_buf *bp, int index);
/*
* Routines used for finding things in the Btree.
diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c
index 1d25bd5b892e..222812fe202c 100644
--- a/fs/xfs/scrub/quota.c
+++ b/fs/xfs/scrub/quota.c
@@ -171,8 +171,10 @@ xchk_quota_item(
error = xchk_quota_item_bmap(sc, dq, offset);
xchk_iunlock(sc, XFS_ILOCK_SHARED);
- if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, offset, &error))
+ if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, offset, &error)) {
+ mutex_unlock(&dq->q_qlock);
return error;
+ }
/*
* Warn if the hard limits are larger than the fs.
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index 39ea651cbb75..286c5f5e0544 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -972,20 +972,12 @@ TRACE_EVENT(xfile_create,
TP_STRUCT__entry(
__field(dev_t, dev)
__field(unsigned long, ino)
- __array(char, pathname, MAXNAMELEN)
),
TP_fast_assign(
- char *path;
-
__entry->ino = file_inode(xf->file)->i_ino;
- path = file_path(xf->file, __entry->pathname, MAXNAMELEN);
- if (IS_ERR(path))
- strncpy(__entry->pathname, "(unknown)",
- sizeof(__entry->pathname));
),
- TP_printk("xfino 0x%lx path '%s'",
- __entry->ino,
- __entry->pathname)
+ TP_printk("xfino 0x%lx",
+ __entry->ino)
);
TRACE_EVENT(xfile_destroy,
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c
index 92331991f9fd..a5b69c0fbfd0 100644
--- a/fs/xfs/xfs_attr_inactive.c
+++ b/fs/xfs/xfs_attr_inactive.c
@@ -140,7 +140,7 @@ xfs_attr3_node_inactive(
xfs_daddr_t parent_blkno, child_blkno;
struct xfs_buf *child_bp;
struct xfs_da3_icnode_hdr ichdr;
- int error, i;
+ int error;
/*
* Since this code is recursive (gasp!) we must protect ourselves.
@@ -152,7 +152,7 @@ xfs_attr3_node_inactive(
return -EFSCORRUPTED;
}
- xfs_da3_node_hdr_from_disk(dp->i_mount, &ichdr, bp->b_addr);
+ xfs_da3_node_hdr_from_disk(mp, &ichdr, bp->b_addr);
parent_blkno = xfs_buf_daddr(bp);
if (!ichdr.count) {
xfs_trans_brelse(*trans, bp);
@@ -167,7 +167,7 @@ xfs_attr3_node_inactive(
* over the leaves removing all of them. If this is higher up
* in the tree, recurse downward.
*/
- for (i = 0; i < ichdr.count; i++) {
+ while (ichdr.count > 0) {
/*
* Read the subsidiary block to see what we have to work with.
* Don't do this in a transaction. This is a depth-first
@@ -218,29 +218,32 @@ xfs_attr3_node_inactive(
xfs_trans_binval(*trans, child_bp);
child_bp = NULL;
+ error = xfs_da3_node_read_mapped(*trans, dp,
+ parent_blkno, &bp, XFS_ATTR_FORK);
+ if (error)
+ return error;
+
/*
- * If we're not done, re-read the parent to get the next
- * child block number.
+ * Remove entry from parent node, prevents being indexed to.
*/
- if (i + 1 < ichdr.count) {
- struct xfs_da3_icnode_hdr phdr;
+ xfs_attr3_node_entry_remove(*trans, dp, bp, 0);
+
+ xfs_da3_node_hdr_from_disk(mp, &ichdr, bp->b_addr);
+ bp = NULL;
- error = xfs_da3_node_read_mapped(*trans, dp,
- parent_blkno, &bp, XFS_ATTR_FORK);
+ if (ichdr.count > 0) {
+ /*
+ * If we're not done, get the next child block number.
+ */
+ child_fsb = be32_to_cpu(ichdr.btree[0].before);
+
+ /*
+ * Atomically commit the whole invalidate stuff.
+ */
+ error = xfs_trans_roll_inode(trans, dp);
if (error)
return error;
- xfs_da3_node_hdr_from_disk(dp->i_mount, &phdr,
- bp->b_addr);
- child_fsb = be32_to_cpu(phdr.btree[i + 1].before);
- xfs_trans_brelse(*trans, bp);
- bp = NULL;
}
- /*
- * Atomically commit the whole invalidate stuff.
- */
- error = xfs_trans_roll_inode(trans, dp);
- if (error)
- return error;
}
return 0;
@@ -257,10 +260,8 @@ xfs_attr3_root_inactive(
struct xfs_trans **trans,
struct xfs_inode *dp)
{
- struct xfs_mount *mp = dp->i_mount;
struct xfs_da_blkinfo *info;
struct xfs_buf *bp;
- xfs_daddr_t blkno;
int error;
/*
@@ -272,7 +273,6 @@ xfs_attr3_root_inactive(
error = xfs_da3_node_read(*trans, dp, 0, &bp, XFS_ATTR_FORK);
if (error)
return error;
- blkno = xfs_buf_daddr(bp);
/*
* Invalidate the tree, even if the "tree" is only a single leaf block.
@@ -283,10 +283,26 @@ xfs_attr3_root_inactive(
case cpu_to_be16(XFS_DA_NODE_MAGIC):
case cpu_to_be16(XFS_DA3_NODE_MAGIC):
error = xfs_attr3_node_inactive(trans, dp, bp, 1);
+ /*
+ * Empty root node block are not allowed, convert it to leaf.
+ */
+ if (!error)
+ error = xfs_attr3_leaf_init(*trans, dp, 0);
+ if (!error)
+ error = xfs_trans_roll_inode(trans, dp);
break;
case cpu_to_be16(XFS_ATTR_LEAF_MAGIC):
case cpu_to_be16(XFS_ATTR3_LEAF_MAGIC):
error = xfs_attr3_leaf_inactive(trans, dp, bp);
+ /*
+ * Reinit the leaf before truncating extents so that a crash
+ * mid-truncation leaves an empty leaf rather than one with
+ * entries that may reference freed remote value blocks.
+ */
+ if (!error)
+ error = xfs_attr3_leaf_init(*trans, dp, 0);
+ if (!error)
+ error = xfs_trans_roll_inode(trans, dp);
break;
default:
xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
@@ -295,21 +311,6 @@ xfs_attr3_root_inactive(
xfs_trans_brelse(*trans, bp);
break;
}
- if (error)
- return error;
-
- /*
- * Invalidate the incore copy of the root block.
- */
- error = xfs_trans_get_buf(*trans, mp->m_ddev_targp, blkno,
- XFS_FSB_TO_BB(mp, mp->m_attr_geo->fsbcount), 0, &bp);
- if (error)
- return error;
- xfs_trans_binval(*trans, bp); /* remove from cache */
- /*
- * Commit the invalidate and start the next transaction.
- */
- error = xfs_trans_roll_inode(trans, dp);
return error;
}
@@ -328,6 +329,7 @@ xfs_attr_inactive(
{
struct xfs_trans *trans;
struct xfs_mount *mp;
+ struct xfs_buf *bp;
int lock_mode = XFS_ILOCK_SHARED;
int error = 0;
@@ -363,10 +365,27 @@ xfs_attr_inactive(
* removal below.
*/
if (dp->i_af.if_nextents > 0) {
+ /*
+ * Invalidate and truncate all blocks but leave the root block.
+ */
error = xfs_attr3_root_inactive(&trans, dp);
if (error)
goto out_cancel;
+ error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK,
+ XFS_FSB_TO_B(mp, mp->m_attr_geo->fsbcount));
+ if (error)
+ goto out_cancel;
+
+ /*
+ * Invalidate and truncate the root block and ensure that the
+ * operation is completed within a single transaction.
+ */
+ error = xfs_da_get_buf(trans, dp, 0, &bp, XFS_ATTR_FORK);
+ if (error)
+ goto out_cancel;
+
+ xfs_trans_binval(trans, bp);
error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0);
if (error)
goto out_cancel;
diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c
index 354472bf45f1..deab14f31b38 100644
--- a/fs/xfs/xfs_attr_item.c
+++ b/fs/xfs/xfs_attr_item.c
@@ -653,7 +653,6 @@ xfs_attri_recover_work(
break;
}
if (error) {
- xfs_irele(ip);
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, attrp,
sizeof(*attrp));
return ERR_PTR(-EFSCORRUPTED);
@@ -1047,8 +1046,8 @@ xlog_recover_attri_commit_pass2(
break;
case XFS_ATTRI_OP_FLAGS_SET:
case XFS_ATTRI_OP_FLAGS_REPLACE:
- /* Log item, attr name, attr value */
- if (item->ri_total != 3) {
+ /* Log item, attr name, optional attr value */
+ if (item->ri_total != 2 + !!attri_formatp->alfi_value_len) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
attri_formatp, len);
return -EFSCORRUPTED;
@@ -1132,52 +1131,6 @@ xlog_recover_attri_commit_pass2(
return -EFSCORRUPTED;
}
- switch (op) {
- case XFS_ATTRI_OP_FLAGS_REMOVE:
- /* Regular remove operations operate only on names. */
- if (attr_value != NULL || value_len != 0) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
- attri_formatp, len);
- return -EFSCORRUPTED;
- }
- fallthrough;
- case XFS_ATTRI_OP_FLAGS_PPTR_REMOVE:
- case XFS_ATTRI_OP_FLAGS_PPTR_SET:
- case XFS_ATTRI_OP_FLAGS_SET:
- case XFS_ATTRI_OP_FLAGS_REPLACE:
- /*
- * Regular xattr set/remove/replace operations require a name
- * and do not take a newname. Values are optional for set and
- * replace.
- *
- * Name-value set/remove operations must have a name, do not
- * take a newname, and can take a value.
- */
- if (attr_name == NULL || name_len == 0) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
- attri_formatp, len);
- return -EFSCORRUPTED;
- }
- break;
- case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE:
- /*
- * Name-value replace operations require the caller to
- * specify the old and new names and values explicitly.
- * Values are optional.
- */
- if (attr_name == NULL || name_len == 0) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
- attri_formatp, len);
- return -EFSCORRUPTED;
- }
- if (attr_new_name == NULL || new_name_len == 0) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
- attri_formatp, len);
- return -EFSCORRUPTED;
- }
- break;
- }
-
/*
* Memory alloc failure will cause replay to abort. We attach the
* name/value buffer to the recovered incore log item and drop our
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
index 491e2a7053a3..65a0e69c3d08 100644
--- a/fs/xfs/xfs_dquot_item.c
+++ b/fs/xfs/xfs_dquot_item.c
@@ -125,6 +125,7 @@ xfs_qm_dquot_logitem_push(
struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip);
struct xfs_dquot *dqp = qlip->qli_dquot;
struct xfs_buf *bp;
+ struct xfs_ail *ailp = lip->li_ailp;
uint rval = XFS_ITEM_SUCCESS;
int error;
@@ -153,7 +154,7 @@ xfs_qm_dquot_logitem_push(
goto out_unlock;
}
- spin_unlock(&lip->li_ailp->ail_lock);
+ spin_unlock(&ailp->ail_lock);
error = xfs_dquot_use_attached_buf(dqp, &bp);
if (error == -EAGAIN) {
@@ -172,9 +173,13 @@ xfs_qm_dquot_logitem_push(
rval = XFS_ITEM_FLUSHING;
}
xfs_buf_relse(bp);
+ /*
+ * The buffer no longer protects the log item from reclaim, so
+ * do not reference lip after this point.
+ */
out_relock_ail:
- spin_lock(&lip->li_ailp->ail_lock);
+ spin_lock(&ailp->ail_lock);
out_unlock:
mutex_unlock(&dqp->q_qlock);
return rval;
diff --git a/fs/xfs/xfs_handle.c b/fs/xfs/xfs_handle.c
index d1291ca15239..2b8617ae7ec2 100644
--- a/fs/xfs/xfs_handle.c
+++ b/fs/xfs/xfs_handle.c
@@ -443,8 +443,8 @@ xfs_ioc_attr_list(
context.dp = dp;
context.resynch = 1;
context.attr_filter = xfs_attr_filter(flags);
- context.buffer = buffer;
context.bufsize = round_down(bufsize, sizeof(uint32_t));
+ context.buffer = buffer;
context.firstu = context.bufsize;
context.put_listent = xfs_ioc_attr_put_listent;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 50c0404f9064..beaa26ec62da 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1048,7 +1048,8 @@ xfs_itruncate_extents_flags(
xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
if (icount_read(VFS_I(ip)))
xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL);
- ASSERT(new_size <= XFS_ISIZE(ip));
+ if (whichfork == XFS_DATA_FORK)
+ ASSERT(new_size <= XFS_ISIZE(ip));
ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
ASSERT(ip->i_itemp != NULL);
ASSERT(ip->i_itemp->ili_lock_flags == 0);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 8913036b8024..4ae81eed0442 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -746,6 +746,7 @@ xfs_inode_item_push(
struct xfs_inode_log_item *iip = INODE_ITEM(lip);
struct xfs_inode *ip = iip->ili_inode;
struct xfs_buf *bp = lip->li_buf;
+ struct xfs_ail *ailp = lip->li_ailp;
uint rval = XFS_ITEM_SUCCESS;
int error;
@@ -771,7 +772,7 @@ xfs_inode_item_push(
if (!xfs_buf_trylock(bp))
return XFS_ITEM_LOCKED;
- spin_unlock(&lip->li_ailp->ail_lock);
+ spin_unlock(&ailp->ail_lock);
/*
* We need to hold a reference for flushing the cluster buffer as it may
@@ -795,7 +796,11 @@ xfs_inode_item_push(
rval = XFS_ITEM_LOCKED;
}
- spin_lock(&lip->li_ailp->ail_lock);
+ /*
+ * The buffer no longer protects the log item from reclaim, so
+ * do not reference lip after this point.
+ */
+ spin_lock(&ailp->ail_lock);
return rval;
}
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 9c295abd0a0a..ef1ea8a1238c 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -608,8 +608,9 @@ xfs_unmount_check(
* have been retrying in the background. This will prevent never-ending
* retries in AIL pushing from hanging the unmount.
*
- * Finally, we can push the AIL to clean all the remaining dirty objects, then
- * reclaim the remaining inodes that are still in memory at this point in time.
+ * Stop inodegc and background reclaim before pushing the AIL so that they
+ * are not running while the AIL is being flushed. Then push the AIL to
+ * clean all the remaining dirty objects and reclaim the remaining inodes.
*/
static void
xfs_unmount_flush_inodes(
@@ -621,9 +622,9 @@ xfs_unmount_flush_inodes(
xfs_set_unmounting(mp);
- xfs_ail_push_all_sync(mp->m_ail);
xfs_inodegc_stop(mp);
cancel_delayed_work_sync(&mp->m_reclaim_work);
+ xfs_ail_push_all_sync(mp->m_ail);
xfs_reclaim_inodes(mp);
xfs_health_unmount(mp);
xfs_healthmon_unmount(mp);
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 813e5a9f57eb..5e8190fe2be9 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -56,6 +56,7 @@
#include <linux/tracepoint.h>
struct xfs_agf;
+struct xfs_ail;
struct xfs_alloc_arg;
struct xfs_attr_list_context;
struct xfs_buf_log_item;
@@ -1650,16 +1651,43 @@ TRACE_EVENT(xfs_log_force,
DEFINE_EVENT(xfs_log_item_class, name, \
TP_PROTO(struct xfs_log_item *lip), \
TP_ARGS(lip))
-DEFINE_LOG_ITEM_EVENT(xfs_ail_push);
-DEFINE_LOG_ITEM_EVENT(xfs_ail_pinned);
-DEFINE_LOG_ITEM_EVENT(xfs_ail_locked);
-DEFINE_LOG_ITEM_EVENT(xfs_ail_flushing);
DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_mark);
DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_skip);
DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_unpin);
DEFINE_LOG_ITEM_EVENT(xlog_ail_insert_abort);
DEFINE_LOG_ITEM_EVENT(xfs_trans_free_abort);
+DECLARE_EVENT_CLASS(xfs_ail_push_class,
+ TP_PROTO(struct xfs_ail *ailp, uint type, unsigned long flags, xfs_lsn_t lsn),
+ TP_ARGS(ailp, type, flags, lsn),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(uint, type)
+ __field(unsigned long, flags)
+ __field(xfs_lsn_t, lsn)
+ ),
+ TP_fast_assign(
+ __entry->dev = ailp->ail_log->l_mp->m_super->s_dev;
+ __entry->type = type;
+ __entry->flags = flags;
+ __entry->lsn = lsn;
+ ),
+ TP_printk("dev %d:%d lsn %d/%d type %s flags %s",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ CYCLE_LSN(__entry->lsn), BLOCK_LSN(__entry->lsn),
+ __print_symbolic(__entry->type, XFS_LI_TYPE_DESC),
+ __print_flags(__entry->flags, "|", XFS_LI_FLAGS))
+)
+
+#define DEFINE_AIL_PUSH_EVENT(name) \
+DEFINE_EVENT(xfs_ail_push_class, name, \
+ TP_PROTO(struct xfs_ail *ailp, uint type, unsigned long flags, xfs_lsn_t lsn), \
+ TP_ARGS(ailp, type, flags, lsn))
+DEFINE_AIL_PUSH_EVENT(xfs_ail_push);
+DEFINE_AIL_PUSH_EVENT(xfs_ail_pinned);
+DEFINE_AIL_PUSH_EVENT(xfs_ail_locked);
+DEFINE_AIL_PUSH_EVENT(xfs_ail_flushing);
+
DECLARE_EVENT_CLASS(xfs_ail_class,
TP_PROTO(struct xfs_log_item *lip, xfs_lsn_t old_lsn, xfs_lsn_t new_lsn),
TP_ARGS(lip, old_lsn, new_lsn),
@@ -5091,23 +5119,16 @@ TRACE_EVENT(xmbuf_create,
TP_STRUCT__entry(
__field(dev_t, dev)
__field(unsigned long, ino)
- __array(char, pathname, MAXNAMELEN)
),
TP_fast_assign(
- char *path;
struct file *file = btp->bt_file;
__entry->dev = btp->bt_mount->m_super->s_dev;
__entry->ino = file_inode(file)->i_ino;
- path = file_path(file, __entry->pathname, MAXNAMELEN);
- if (IS_ERR(path))
- strncpy(__entry->pathname, "(unknown)",
- sizeof(__entry->pathname));
),
- TP_printk("dev %d:%d xmino 0x%lx path '%s'",
+ TP_printk("dev %d:%d xmino 0x%lx",
MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->ino,
- __entry->pathname)
+ __entry->ino)
);
TRACE_EVENT(xmbuf_free,
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 923729af4206..99a9bf3762b7 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -365,6 +365,12 @@ xfsaild_resubmit_item(
return XFS_ITEM_SUCCESS;
}
+/*
+ * Push a single log item from the AIL.
+ *
+ * @lip may have been released and freed by the time this function returns,
+ * so callers must not dereference the log item afterwards.
+ */
static inline uint
xfsaild_push_item(
struct xfs_ail *ailp,
@@ -458,6 +464,74 @@ xfs_ail_calc_push_target(
return target_lsn;
}
+static void
+xfsaild_process_logitem(
+ struct xfs_ail *ailp,
+ struct xfs_log_item *lip,
+ int *stuck,
+ int *flushing)
+{
+ struct xfs_mount *mp = ailp->ail_log->l_mp;
+ uint type = lip->li_type;
+ unsigned long flags = lip->li_flags;
+ xfs_lsn_t item_lsn = lip->li_lsn;
+ int lock_result;
+
+ /*
+ * Note that iop_push may unlock and reacquire the AIL lock. We
+ * rely on the AIL cursor implementation to be able to deal with
+ * the dropped lock.
+ *
+ * The log item may have been freed by the push, so it must not
+ * be accessed or dereferenced below this line.
+ */
+ lock_result = xfsaild_push_item(ailp, lip);
+ switch (lock_result) {
+ case XFS_ITEM_SUCCESS:
+ XFS_STATS_INC(mp, xs_push_ail_success);
+ trace_xfs_ail_push(ailp, type, flags, item_lsn);
+
+ ailp->ail_last_pushed_lsn = item_lsn;
+ break;
+
+ case XFS_ITEM_FLUSHING:
+ /*
+ * The item or its backing buffer is already being
+ * flushed. The typical reason for that is that an
+ * inode buffer is locked because we already pushed the
+ * updates to it as part of inode clustering.
+ *
+ * We do not want to stop flushing just because lots
+ * of items are already being flushed, but we need to
+ * re-try the flushing relatively soon if most of the
+ * AIL is being flushed.
+ */
+ XFS_STATS_INC(mp, xs_push_ail_flushing);
+ trace_xfs_ail_flushing(ailp, type, flags, item_lsn);
+
+ (*flushing)++;
+ ailp->ail_last_pushed_lsn = item_lsn;
+ break;
+
+ case XFS_ITEM_PINNED:
+ XFS_STATS_INC(mp, xs_push_ail_pinned);
+ trace_xfs_ail_pinned(ailp, type, flags, item_lsn);
+
+ (*stuck)++;
+ ailp->ail_log_flush++;
+ break;
+ case XFS_ITEM_LOCKED:
+ XFS_STATS_INC(mp, xs_push_ail_locked);
+ trace_xfs_ail_locked(ailp, type, flags, item_lsn);
+
+ (*stuck)++;
+ break;
+ default:
+ ASSERT(0);
+ break;
+ }
+}
+
static long
xfsaild_push(
struct xfs_ail *ailp)
@@ -505,62 +579,11 @@ xfsaild_push(
lsn = lip->li_lsn;
while ((XFS_LSN_CMP(lip->li_lsn, ailp->ail_target) <= 0)) {
- int lock_result;
if (test_bit(XFS_LI_FLUSHING, &lip->li_flags))
goto next_item;
- /*
- * Note that iop_push may unlock and reacquire the AIL lock. We
- * rely on the AIL cursor implementation to be able to deal with
- * the dropped lock.
- */
- lock_result = xfsaild_push_item(ailp, lip);
- switch (lock_result) {
- case XFS_ITEM_SUCCESS:
- XFS_STATS_INC(mp, xs_push_ail_success);
- trace_xfs_ail_push(lip);
-
- ailp->ail_last_pushed_lsn = lsn;
- break;
-
- case XFS_ITEM_FLUSHING:
- /*
- * The item or its backing buffer is already being
- * flushed. The typical reason for that is that an
- * inode buffer is locked because we already pushed the
- * updates to it as part of inode clustering.
- *
- * We do not want to stop flushing just because lots
- * of items are already being flushed, but we need to
- * re-try the flushing relatively soon if most of the
- * AIL is being flushed.
- */
- XFS_STATS_INC(mp, xs_push_ail_flushing);
- trace_xfs_ail_flushing(lip);
-
- flushing++;
- ailp->ail_last_pushed_lsn = lsn;
- break;
-
- case XFS_ITEM_PINNED:
- XFS_STATS_INC(mp, xs_push_ail_pinned);
- trace_xfs_ail_pinned(lip);
-
- stuck++;
- ailp->ail_log_flush++;
- break;
- case XFS_ITEM_LOCKED:
- XFS_STATS_INC(mp, xs_push_ail_locked);
- trace_xfs_ail_locked(lip);
-
- stuck++;
- break;
- default:
- ASSERT(0);
- break;
- }
-
+ xfsaild_process_logitem(ailp, lip, &stuck, &flushing);
count++;
/*
diff --git a/fs/xfs/xfs_verify_media.c b/fs/xfs/xfs_verify_media.c
index 8bbd4ec567f8..5ead3976d511 100644
--- a/fs/xfs/xfs_verify_media.c
+++ b/fs/xfs/xfs_verify_media.c
@@ -183,10 +183,9 @@ xfs_verify_iosize(
min_not_zero(SZ_1M, me->me_max_io_size);
BUILD_BUG_ON(BBSHIFT != SECTOR_SHIFT);
- ASSERT(BBTOB(bbcount) >= bdev_logical_block_size(btp->bt_bdev));
+ ASSERT(BBTOB(bbcount) >= btp->bt_logical_sectorsize);
- return clamp(iosize, bdev_logical_block_size(btp->bt_bdev),
- BBTOB(bbcount));
+ return clamp(iosize, btp->bt_logical_sectorsize, BBTOB(bbcount));
}
/* Allocate as much memory as we can get for verification buffer. */
@@ -218,8 +217,8 @@ xfs_verify_media_error(
unsigned int bio_bbcount,
blk_status_t bio_status)
{
- trace_xfs_verify_media_error(mp, me, btp->bt_bdev->bd_dev, daddr,
- bio_bbcount, bio_status);
+ trace_xfs_verify_media_error(mp, me, btp->bt_dev, daddr, bio_bbcount,
+ bio_status);
/*
* Pass any error, I/O or otherwise, up to the caller if we didn't
@@ -280,7 +279,7 @@ xfs_verify_media(
btp = mp->m_ddev_targp;
break;
case XFS_DEV_LOG:
- if (mp->m_logdev_targp->bt_bdev != mp->m_ddev_targp->bt_bdev)
+ if (mp->m_logdev_targp != mp->m_ddev_targp)
btp = mp->m_logdev_targp;
break;
case XFS_DEV_RT:
@@ -299,7 +298,7 @@ xfs_verify_media(
/* start and end have to be aligned to the lba size */
if (!IS_ALIGNED(BBTOB(me->me_start_daddr | me->me_end_daddr),
- bdev_logical_block_size(btp->bt_bdev)))
+ btp->bt_logical_sectorsize))
return -EINVAL;
/*
@@ -331,8 +330,7 @@ xfs_verify_media(
if (!folio)
return -ENOMEM;
- trace_xfs_verify_media(mp, me, btp->bt_bdev->bd_dev, daddr, bbcount,
- folio);
+ trace_xfs_verify_media(mp, me, btp->bt_dev, daddr, bbcount, folio);
bio = bio_alloc(btp->bt_bdev, 1, REQ_OP_READ, GFP_KERNEL);
if (!bio) {
@@ -400,7 +398,7 @@ out_folio:
* an operational error.
*/
me->me_start_daddr = daddr;
- trace_xfs_verify_media_end(mp, me, btp->bt_bdev->bd_dev);
+ trace_xfs_verify_media_end(mp, me, btp->bt_dev);
return 0;
}
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
index a735f16d9cd8..544213067d59 100644
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -332,8 +332,8 @@ xfs_vn_listxattr(
memset(&context, 0, sizeof(context));
context.dp = XFS_I(inode);
context.resynch = 1;
- context.buffer = size ? data : NULL;
context.bufsize = size;
+ context.buffer = size ? data : NULL;
context.firstu = context.bufsize;
context.put_listent = xfs_xattr_put_listent;
diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h
index 107b797c33ec..0cc8fa749f68 100644
--- a/include/crypto/if_alg.h
+++ b/include/crypto/if_alg.h
@@ -230,9 +230,8 @@ static inline bool af_alg_readable(struct sock *sk)
return PAGE_SIZE <= af_alg_rcvbuf(sk);
}
-unsigned int af_alg_count_tsgl(struct sock *sk, size_t bytes, size_t offset);
-void af_alg_pull_tsgl(struct sock *sk, size_t used, struct scatterlist *dst,
- size_t dst_offset);
+unsigned int af_alg_count_tsgl(struct sock *sk, size_t bytes);
+void af_alg_pull_tsgl(struct sock *sk, size_t used, struct scatterlist *dst);
void af_alg_wmem_wakeup(struct sock *sk);
int af_alg_wait_for_data(struct sock *sk, unsigned flags, unsigned min);
int af_alg_sendmsg(struct socket *sock, struct msghdr *msg, size_t size,
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 05b34a6355b0..35b1e25bd104 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1854,6 +1854,10 @@ struct bpf_link_ops {
* target hook is sleepable, we'll go through tasks trace RCU GP and
* then "classic" RCU GP; this need for chaining tasks trace and
* classic RCU GPs is designated by setting bpf_link->sleepable flag
+ *
+ * For non-sleepable tracepoint links we go through SRCU gp instead,
+ * since RCU is not used in that case. Sleepable tracepoints still
+ * follow the scheme above.
*/
void (*dealloc_deferred)(struct bpf_link *link);
int (*detach)(struct bpf_link *link);
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index bb92f5c169ca..7f87399938fa 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -609,6 +609,9 @@ struct cgroup {
/* used to wait for offlining of csses */
wait_queue_head_t offline_waitq;
+ /* used by cgroup_rmdir() to wait for dying tasks to leave */
+ wait_queue_head_t dying_populated_waitq;
+
/* used to schedule release agent */
struct work_struct release_agent_work;
diff --git a/include/linux/damon.h b/include/linux/damon.h
index a4fea23da857..be3d198043ff 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -810,6 +810,12 @@ struct damon_ctx {
struct damos_walk_control *walk_control;
struct mutex walk_control_lock;
+ /*
+ * indicate if this may be corrupted. Currentonly this is set only for
+ * damon_commit_ctx() failure.
+ */
+ bool maybe_corrupted;
+
/* Working thread of the given DAMON context */
struct task_struct *kdamond;
/* Protects @kdamond field access */
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 29973baa0581..99ef042ecdb4 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -80,11 +80,18 @@
#define DMA_ATTR_MMIO (1UL << 10)
/*
- * DMA_ATTR_CPU_CACHE_CLEAN: Indicates the CPU will not dirty any cacheline
- * overlapping this buffer while it is mapped for DMA. All mappings sharing
- * a cacheline must have this attribute for this to be considered safe.
+ * DMA_ATTR_DEBUGGING_IGNORE_CACHELINES: Indicates the CPU cache line can be
+ * overlapped. All mappings sharing a cacheline must have this attribute for
+ * this to be considered safe.
*/
-#define DMA_ATTR_CPU_CACHE_CLEAN (1UL << 11)
+#define DMA_ATTR_DEBUGGING_IGNORE_CACHELINES (1UL << 11)
+
+/*
+ * DMA_ATTR_REQUIRE_COHERENT: Indicates that DMA coherency is required.
+ * All mappings that carry this attribute can't work with SWIOTLB and cache
+ * flushing.
+ */
+#define DMA_ATTR_REQUIRE_COHERENT (1UL << 12)
/*
* A dma_addr_t can hold any valid DMA or bus address for the platform. It can
@@ -248,8 +255,8 @@ static inline void *dma_alloc_attrs(struct device *dev, size_t size,
{
return NULL;
}
-static void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr,
- dma_addr_t dma_handle, unsigned long attrs)
+static inline void dma_free_attrs(struct device *dev, size_t size,
+ void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs)
{
}
static inline void *dmam_alloc_attrs(struct device *dev, size_t size,
diff --git a/include/linux/fs/super_types.h b/include/linux/fs/super_types.h
index fa7638b81246..383050e7fdf5 100644
--- a/include/linux/fs/super_types.h
+++ b/include/linux/fs/super_types.h
@@ -338,5 +338,6 @@ struct super_block {
#define SB_I_NOUMASK 0x00001000 /* VFS does not apply umask */
#define SB_I_NOIDMAP 0x00002000 /* No idmapped mounts on this superblock */
#define SB_I_ALLOW_HSM 0x00004000 /* Allow HSM events on this superblock */
+#define SB_I_NO_DATA_INTEGRITY 0x00008000 /* fs cannot guarantee data persistence on sync */
#endif /* _LINUX_FS_SUPER_TYPES_H */
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 54b8b48c762e..555597b54083 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -980,7 +980,8 @@ static inline void iommu_flush_iotlb_all(struct iommu_domain *domain)
static inline void iommu_iotlb_sync(struct iommu_domain *domain,
struct iommu_iotlb_gather *iotlb_gather)
{
- if (domain->ops->iotlb_sync)
+ if (domain->ops->iotlb_sync &&
+ likely(iotlb_gather->start < iotlb_gather->end))
domain->ops->iotlb_sync(domain, iotlb_gather);
iommu_iotlb_gather_init(iotlb_gather);
diff --git a/include/linux/leafops.h b/include/linux/leafops.h
index a9ff94b744f2..05673d3529e7 100644
--- a/include/linux/leafops.h
+++ b/include/linux/leafops.h
@@ -363,6 +363,23 @@ static inline unsigned long softleaf_to_pfn(softleaf_t entry)
return swp_offset(entry) & SWP_PFN_MASK;
}
+static inline void softleaf_migration_sync(softleaf_t entry,
+ struct folio *folio)
+{
+ /*
+ * Ensure we do not race with split, which might alter tail pages into new
+ * folios and thus result in observing an unlocked folio.
+ * This matches the write barrier in __split_folio_to_order().
+ */
+ smp_rmb();
+
+ /*
+ * Any use of migration entries may only occur while the
+ * corresponding page is locked
+ */
+ VM_WARN_ON_ONCE(!folio_test_locked(folio));
+}
+
/**
* softleaf_to_page() - Obtains struct page for PFN encoded within leaf entry.
* @entry: Leaf entry, softleaf_has_pfn(@entry) must return true.
@@ -374,11 +391,8 @@ static inline struct page *softleaf_to_page(softleaf_t entry)
struct page *page = pfn_to_page(softleaf_to_pfn(entry));
VM_WARN_ON_ONCE(!softleaf_has_pfn(entry));
- /*
- * Any use of migration entries may only occur while the
- * corresponding page is locked
- */
- VM_WARN_ON_ONCE(softleaf_is_migration(entry) && !PageLocked(page));
+ if (softleaf_is_migration(entry))
+ softleaf_migration_sync(entry, page_folio(page));
return page;
}
@@ -394,12 +408,8 @@ static inline struct folio *softleaf_to_folio(softleaf_t entry)
struct folio *folio = pfn_folio(softleaf_to_pfn(entry));
VM_WARN_ON_ONCE(!softleaf_has_pfn(entry));
- /*
- * Any use of migration entries may only occur while the
- * corresponding folio is locked.
- */
- VM_WARN_ON_ONCE(softleaf_is_migration(entry) &&
- !folio_test_locked(folio));
+ if (softleaf_is_migration(entry))
+ softleaf_migration_sync(entry, folio);
return folio;
}
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 0fe96f3ab3ef..65c732d440d2 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -55,6 +55,7 @@ struct mempolicy {
nodemask_t cpuset_mems_allowed; /* relative to these nodes */
nodemask_t user_nodemask; /* nodemask passed by user */
} w;
+ struct rcu_head rcu;
};
/*
diff --git a/include/linux/mpage.h b/include/linux/mpage.h
index 1bdc39daac0a..358946990bfa 100644
--- a/include/linux/mpage.h
+++ b/include/linux/mpage.h
@@ -17,7 +17,14 @@ struct readahead_control;
void mpage_readahead(struct readahead_control *, get_block_t get_block);
int mpage_read_folio(struct folio *folio, get_block_t get_block);
-int mpage_writepages(struct address_space *mapping,
- struct writeback_control *wbc, get_block_t get_block);
+int __mpage_writepages(struct address_space *mapping,
+ struct writeback_control *wbc, get_block_t get_block,
+ int (*write_folio)(struct folio *folio,
+ struct writeback_control *wbc));
+static inline int mpage_writepages(struct address_space *mapping,
+ struct writeback_control *wbc, get_block_t get_block)
+{
+ return __mpage_writepages(mapping, wbc, get_block, NULL);
+}
#endif
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index e9f4f845d760..b98331572ad2 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -309,7 +309,7 @@ enum {
/* register and unregister set references */
extern ip_set_id_t ip_set_get_byname(struct net *net,
- const char *name, struct ip_set **set);
+ const struct nlattr *name, struct ip_set **set);
extern void ip_set_put_byindex(struct net *net, ip_set_id_t index);
extern void ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name);
extern ip_set_id_t ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index);
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 72ee7d210a74..ba17ac5bf356 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -140,7 +140,6 @@ struct netfs_io_stream {
void (*issue_write)(struct netfs_io_subrequest *subreq);
/* Collection tracking */
struct list_head subrequests; /* Contributory I/O operations */
- struct netfs_io_subrequest *front; /* Op being collected */
unsigned long long collected_to; /* Position we've collected results to */
size_t transferred; /* The amount transferred from this stream */
unsigned short error; /* Aggregate error for the stream */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index ec442af3f886..31a848485ad9 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -210,7 +210,6 @@ enum mapping_flags {
AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM = 9,
AS_KERNEL_FILE = 10, /* mapping for a fake kernel file that shouldn't
account usage to user cgroups */
- AS_NO_DATA_INTEGRITY = 11, /* no data integrity guarantees */
/* Bits 16-25 are used for FOLIO_ORDER */
AS_FOLIO_ORDER_BITS = 5,
AS_FOLIO_ORDER_MIN = 16,
@@ -346,16 +345,6 @@ static inline bool mapping_writeback_may_deadlock_on_reclaim(const struct addres
return test_bit(AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM, &mapping->flags);
}
-static inline void mapping_set_no_data_integrity(struct address_space *mapping)
-{
- set_bit(AS_NO_DATA_INTEGRITY, &mapping->flags);
-}
-
-static inline bool mapping_no_data_integrity(const struct address_space *mapping)
-{
- return test_bit(AS_NO_DATA_INTEGRITY, &mapping->flags);
-}
-
static inline gfp_t mapping_gfp_mask(const struct address_space *mapping)
{
return mapping->gfp_mask;
diff --git a/include/linux/security.h b/include/linux/security.h
index 83a646d72f6f..ee88dd2d2d1f 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -145,6 +145,7 @@ enum lockdown_reason {
LOCKDOWN_BPF_WRITE_USER,
LOCKDOWN_DBG_WRITE_KERNEL,
LOCKDOWN_RTAS_ERROR_INJECTION,
+ LOCKDOWN_XEN_USER_ACTIONS,
LOCKDOWN_INTEGRITY_MAX,
LOCKDOWN_KCORE,
LOCKDOWN_KPROBES,
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index daa4e4944ce3..2f278ce376b7 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -5097,6 +5097,7 @@ static inline bool skb_has_extensions(struct sk_buff *skb)
return unlikely(skb->active_extensions);
}
#else
+static inline void __skb_ext_put(struct skb_ext *ext) {}
static inline void skb_ext_put(struct sk_buff *skb) {}
static inline void skb_ext_reset(struct sk_buff *skb) {}
static inline void skb_ext_del(struct sk_buff *skb, int unused) {}
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index af7cfee7b8f6..0dc671c07d3a 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -159,10 +159,6 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg,
* @modalias: Name of the driver to use with this device, or an alias
* for that name. This appears in the sysfs "modalias" attribute
* for driver coldplugging, and in uevents used for hotplugging
- * @driver_override: If the name of a driver is written to this attribute, then
- * the device will bind to the named driver and only the named driver.
- * Do not set directly, because core frees it; use driver_set_override() to
- * set or clear it.
* @pcpu_statistics: statistics for the spi_device
* @word_delay: delay to be inserted between consecutive
* words of a transfer
@@ -224,7 +220,6 @@ struct spi_device {
void *controller_state;
void *controller_data;
char modalias[SPI_NAME_SIZE];
- const char *driver_override;
/* The statistics */
struct spi_statistics __percpu *pcpu_statistics;
diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
index dec7cbe015aa..905b629e8fa3 100644
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -11,6 +11,7 @@
#ifndef _LINUX_SRCU_TINY_H
#define _LINUX_SRCU_TINY_H
+#include <linux/irq_work_types.h>
#include <linux/swait.h>
struct srcu_struct {
@@ -24,18 +25,21 @@ struct srcu_struct {
struct rcu_head *srcu_cb_head; /* Pending callbacks: Head. */
struct rcu_head **srcu_cb_tail; /* Pending callbacks: Tail. */
struct work_struct srcu_work; /* For driving grace periods. */
+ struct irq_work srcu_irq_work; /* Defer schedule_work() to irq work. */
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map dep_map;
#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
};
void srcu_drive_gp(struct work_struct *wp);
+void srcu_tiny_irq_work(struct irq_work *irq_work);
#define __SRCU_STRUCT_INIT(name, __ignored, ___ignored, ____ignored) \
{ \
.srcu_wq = __SWAIT_QUEUE_HEAD_INITIALIZER(name.srcu_wq), \
.srcu_cb_tail = &name.srcu_cb_head, \
.srcu_work = __WORK_INITIALIZER(name.srcu_work, srcu_drive_gp), \
+ .srcu_irq_work = { .func = srcu_tiny_irq_work }, \
__SRCU_DEP_MAP_INIT(name) \
}
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index 958cb7ef41cb..be76fa4fc170 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -34,7 +34,7 @@ struct srcu_data {
/* Values: SRCU_READ_FLAVOR_.* */
/* Update-side state. */
- spinlock_t __private lock ____cacheline_internodealigned_in_smp;
+ raw_spinlock_t __private lock ____cacheline_internodealigned_in_smp;
struct rcu_segcblist srcu_cblist; /* List of callbacks.*/
unsigned long srcu_gp_seq_needed; /* Furthest future GP needed. */
unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */
@@ -55,7 +55,7 @@ struct srcu_data {
* Node in SRCU combining tree, similar in function to rcu_data.
*/
struct srcu_node {
- spinlock_t __private lock;
+ raw_spinlock_t __private lock;
unsigned long srcu_have_cbs[4]; /* GP seq for children having CBs, but only */
/* if greater than ->srcu_gp_seq. */
unsigned long srcu_data_have_cbs[4]; /* Which srcu_data structs have CBs for given GP? */
@@ -74,7 +74,7 @@ struct srcu_usage {
/* First node at each level. */
int srcu_size_state; /* Small-to-big transition state. */
struct mutex srcu_cb_mutex; /* Serialize CB preparation. */
- spinlock_t __private lock; /* Protect counters and size state. */
+ raw_spinlock_t __private lock; /* Protect counters and size state. */
struct mutex srcu_gp_mutex; /* Serialize GP work. */
unsigned long srcu_gp_seq; /* Grace-period seq #. */
unsigned long srcu_gp_seq_needed; /* Latest gp_seq needed. */
@@ -95,6 +95,7 @@ struct srcu_usage {
unsigned long reschedule_jiffies;
unsigned long reschedule_count;
struct delayed_work work;
+ struct irq_work irq_work;
struct srcu_struct *srcu_ssp;
};
@@ -156,7 +157,7 @@ struct srcu_struct {
#define __SRCU_USAGE_INIT(name) \
{ \
- .lock = __SPIN_LOCK_UNLOCKED(name.lock), \
+ .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \
.srcu_gp_seq = SRCU_GP_SEQ_INITIAL_VAL, \
.srcu_gp_seq_needed = SRCU_GP_SEQ_INITIAL_VAL_WITH_STATE, \
.srcu_gp_seq_needed_exp = SRCU_GP_SEQ_INITIAL_VAL, \
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 22ca1c8b54f3..1d7f29f5e901 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -122,6 +122,22 @@ static inline bool tracepoint_is_faultable(struct tracepoint *tp)
{
return tp->ext && tp->ext->faultable;
}
+/*
+ * Run RCU callback with the appropriate grace period wait for non-faultable
+ * tracepoints, e.g., those used in atomic context.
+ */
+static inline void call_tracepoint_unregister_atomic(struct rcu_head *rcu, rcu_callback_t func)
+{
+ call_srcu(&tracepoint_srcu, rcu, func);
+}
+/*
+ * Run RCU callback with the appropriate grace period wait for faultable
+ * tracepoints, e.g., those used in syscall context.
+ */
+static inline void call_tracepoint_unregister_syscall(struct rcu_head *rcu, rcu_callback_t func)
+{
+ call_rcu_tasks_trace(rcu, func);
+}
#else
static inline void tracepoint_synchronize_unregister(void)
{ }
@@ -129,6 +145,10 @@ static inline bool tracepoint_is_faultable(struct tracepoint *tp)
{
return false;
}
+static inline void call_tracepoint_unregister_atomic(struct rcu_head *rcu, rcu_callback_t func)
+{ }
+static inline void call_tracepoint_unregister_syscall(struct rcu_head *rcu, rcu_callback_t func)
+{ }
#endif
#ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 75dabb763c65..f36d21b5bc19 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -207,6 +207,39 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
return __virtio_net_hdr_to_skb(skb, hdr, little_endian, hdr->gso_type);
}
+/* This function must be called after virtio_net_hdr_from_skb(). */
+static inline void __virtio_net_set_hdrlen(const struct sk_buff *skb,
+ struct virtio_net_hdr *hdr,
+ bool little_endian)
+{
+ u16 hdr_len;
+
+ hdr_len = skb_transport_offset(skb);
+
+ if (hdr->gso_type == VIRTIO_NET_HDR_GSO_UDP_L4)
+ hdr_len += sizeof(struct udphdr);
+ else
+ hdr_len += tcp_hdrlen(skb);
+
+ hdr->hdr_len = __cpu_to_virtio16(little_endian, hdr_len);
+}
+
+/* This function must be called after virtio_net_hdr_from_skb(). */
+static inline void __virtio_net_set_tnl_hdrlen(const struct sk_buff *skb,
+ struct virtio_net_hdr *hdr)
+{
+ u16 hdr_len;
+
+ hdr_len = skb_inner_transport_offset(skb);
+
+ if (hdr->gso_type == VIRTIO_NET_HDR_GSO_UDP_L4)
+ hdr_len += sizeof(struct udphdr);
+ else
+ hdr_len += inner_tcp_hdrlen(skb);
+
+ hdr->hdr_len = __cpu_to_virtio16(true, hdr_len);
+}
+
static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb,
struct virtio_net_hdr *hdr,
bool little_endian,
@@ -385,7 +418,8 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb,
bool tnl_hdr_negotiated,
bool little_endian,
int vlan_hlen,
- bool has_data_valid)
+ bool has_data_valid,
+ bool feature_hdrlen)
{
struct virtio_net_hdr *hdr = (struct virtio_net_hdr *)vhdr;
unsigned int inner_nh, outer_th;
@@ -394,9 +428,17 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb,
tnl_gso_type = skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL |
SKB_GSO_UDP_TUNNEL_CSUM);
- if (!tnl_gso_type)
- return virtio_net_hdr_from_skb(skb, hdr, little_endian,
- has_data_valid, vlan_hlen);
+ if (!tnl_gso_type) {
+ ret = virtio_net_hdr_from_skb(skb, hdr, little_endian,
+ has_data_valid, vlan_hlen);
+ if (ret)
+ return ret;
+
+ if (feature_hdrlen && hdr->hdr_len)
+ __virtio_net_set_hdrlen(skb, hdr, little_endian);
+
+ return ret;
+ }
/* Tunnel support not negotiated but skb ask for it. */
if (!tnl_hdr_negotiated)
@@ -414,6 +456,9 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb,
if (ret)
return ret;
+ if (feature_hdrlen && hdr->hdr_len)
+ __virtio_net_set_tnl_hdrlen(skb, hdr);
+
if (skb->protocol == htons(ETH_P_IPV6))
hdr->gso_type |= VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6;
else
diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 010f1a8fd15f..5172afee5494 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -658,6 +658,7 @@ struct l2cap_conn {
struct sk_buff *rx_skb;
__u32 rx_len;
struct ida tx_ida;
+ __u8 tx_ident;
struct sk_buff_head pending_rx;
struct work_struct pending_rx_work;
diff --git a/include/net/codel_impl.h b/include/net/codel_impl.h
index 78a27ac73070..b2c359c6dd1b 100644
--- a/include/net/codel_impl.h
+++ b/include/net/codel_impl.h
@@ -158,6 +158,7 @@ static struct sk_buff *codel_dequeue(void *ctx,
bool drop;
if (!skb) {
+ vars->first_above_time = 0;
vars->dropping = false;
return skb;
}
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 5a979dcab538..6d936e9f2fd3 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -264,6 +264,20 @@ inet_bhashfn_portaddr(const struct inet_hashinfo *hinfo, const struct sock *sk,
return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)];
}
+static inline bool inet_use_hash2_on_bind(const struct sock *sk)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6) {
+ if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
+ return false;
+
+ if (!ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
+ return true;
+ }
+#endif
+ return sk->sk_rcv_saddr != htonl(INADDR_ANY);
+}
+
struct inet_bind_hashbucket *
inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, int port);
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 88b0dd4d8e09..9f8b6814a96a 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -507,12 +507,14 @@ void fib6_rt_update(struct net *net, struct fib6_info *rt,
void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
unsigned int flags);
+void fib6_age_exceptions(struct fib6_info *rt, struct fib6_gc_args *gc_args,
+ unsigned long now);
void fib6_run_gc(unsigned long expires, struct net *net, bool force);
-
void fib6_gc_cleanup(void);
int fib6_init(void);
+#if IS_ENABLED(CONFIG_IPV6)
/* Add the route to the gc list if it is not already there
*
* The callers should hold f6i->fib6_table->tb6_lock.
@@ -545,6 +547,23 @@ static inline void fib6_remove_gc_list(struct fib6_info *f6i)
hlist_del_init(&f6i->gc_link);
}
+static inline void fib6_may_remove_gc_list(struct net *net,
+ struct fib6_info *f6i)
+{
+ struct fib6_gc_args gc_args;
+
+ if (hlist_unhashed(&f6i->gc_link))
+ return;
+
+ gc_args.timeout = READ_ONCE(net->ipv6.sysctl.ip6_rt_gc_interval);
+ gc_args.more = 0;
+
+ rcu_read_lock();
+ fib6_age_exceptions(f6i, &gc_args, jiffies);
+ rcu_read_unlock();
+}
+#endif
+
struct ipv6_route_iter {
struct seq_net_private p;
struct fib6_walker w;
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 3384859a8921..8883575adcc1 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -83,6 +83,11 @@ void nf_conntrack_lock(spinlock_t *lock);
extern spinlock_t nf_conntrack_expect_lock;
+static inline void lockdep_nfct_expect_lock_held(void)
+{
+ lockdep_assert_held(&nf_conntrack_expect_lock);
+}
+
/* ctnetlink code shared by both ctnetlink and nf_conntrack_bpf */
static inline void __nf_ct_set_timeout(struct nf_conn *ct, u64 timeout)
diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index 165e7a03b8e9..e9a8350e7ccf 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -22,10 +22,16 @@ struct nf_conntrack_expect {
/* Hash member */
struct hlist_node hnode;
+ /* Network namespace */
+ possible_net_t net;
+
/* We expect this tuple, with the following mask */
struct nf_conntrack_tuple tuple;
struct nf_conntrack_tuple_mask mask;
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ struct nf_conntrack_zone zone;
+#endif
/* Usage count. */
refcount_t use;
@@ -40,7 +46,7 @@ struct nf_conntrack_expect {
struct nf_conntrack_expect *this);
/* Helper to assign to new connection */
- struct nf_conntrack_helper *helper;
+ struct nf_conntrack_helper __rcu *helper;
/* The conntrack of the master connection */
struct nf_conn *master;
@@ -62,7 +68,17 @@ struct nf_conntrack_expect {
static inline struct net *nf_ct_exp_net(struct nf_conntrack_expect *exp)
{
- return nf_ct_net(exp->master);
+ return read_pnet(&exp->net);
+}
+
+static inline bool nf_ct_exp_zone_equal_any(const struct nf_conntrack_expect *a,
+ const struct nf_conntrack_zone *b)
+{
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ return a->zone.id == b->id;
+#else
+ return true;
+#endif
}
#define NF_CT_EXP_POLICY_NAME_LEN 16
diff --git a/include/net/netns/mpls.h b/include/net/netns/mpls.h
index 6682e51513ef..2073cbac2afb 100644
--- a/include/net/netns/mpls.h
+++ b/include/net/netns/mpls.h
@@ -17,6 +17,7 @@ struct netns_mpls {
size_t platform_labels;
struct mpls_route __rcu * __rcu *platform_label;
struct mutex platform_mutex;
+ seqcount_mutex_t platform_label_seq;
struct ctl_table_header *ctl;
};
diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 23dd647fe024..b73983a17e08 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -59,7 +59,7 @@ struct netns_xfrm {
struct list_head inexact_bins;
- struct sock *nlsk;
+ struct sock __rcu *nlsk;
struct sock *nlsk_stash;
u32 sysctl_aevent_etime;
diff --git a/include/sound/sdca_function.h b/include/sound/sdca_function.h
index 79bd5a7a0f88..0e871c786513 100644
--- a/include/sound/sdca_function.h
+++ b/include/sound/sdca_function.h
@@ -27,11 +27,6 @@ struct sdca_function_desc;
#define SDCA_MAX_ENTITY_COUNT 128
/*
- * Sanity check on number of initialization writes, can be expanded if needed.
- */
-#define SDCA_MAX_INIT_COUNT 2048
-
-/*
* The Cluster IDs are 16-bit, so a maximum of 65535 Clusters per
* function can be represented, however limit this to a slightly
* more reasonable value. Can be expanded if needed.
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 125bdc166bfe..0864700f76e0 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -769,12 +769,15 @@ TRACE_EVENT(btrfs_sync_file,
),
TP_fast_assign(
- const struct dentry *dentry = file->f_path.dentry;
- const struct inode *inode = d_inode(dentry);
+ struct dentry *dentry = file_dentry(file);
+ struct inode *inode = file_inode(file);
+ struct dentry *parent = dget_parent(dentry);
+ struct inode *parent_inode = d_inode(parent);
- TP_fast_assign_fsid(btrfs_sb(file->f_path.dentry->d_sb));
+ dput(parent);
+ TP_fast_assign_fsid(btrfs_sb(inode->i_sb));
__entry->ino = btrfs_ino(BTRFS_I(inode));
- __entry->parent = btrfs_ino(BTRFS_I(d_inode(dentry->d_parent)));
+ __entry->parent = btrfs_ino(BTRFS_I(parent_inode));
__entry->datasync = datasync;
__entry->root_objectid = btrfs_root_id(BTRFS_I(inode)->root);
),
diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h
index 33e99e792f1a..63597b004424 100644
--- a/include/trace/events/dma.h
+++ b/include/trace/events/dma.h
@@ -32,7 +32,9 @@ TRACE_DEFINE_ENUM(DMA_NONE);
{ DMA_ATTR_ALLOC_SINGLE_PAGES, "ALLOC_SINGLE_PAGES" }, \
{ DMA_ATTR_NO_WARN, "NO_WARN" }, \
{ DMA_ATTR_PRIVILEGED, "PRIVILEGED" }, \
- { DMA_ATTR_MMIO, "MMIO" })
+ { DMA_ATTR_MMIO, "MMIO" }, \
+ { DMA_ATTR_DEBUGGING_IGNORE_CACHELINES, "CACHELINES_OVERLAP" }, \
+ { DMA_ATTR_REQUIRE_COHERENT, "REQUIRE_COHERENT" })
DECLARE_EVENT_CLASS(dma_map,
TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr,
diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
index 2d366be46a1c..cbe28211106c 100644
--- a/include/trace/events/netfs.h
+++ b/include/trace/events/netfs.h
@@ -740,19 +740,19 @@ TRACE_EVENT(netfs_collect_stream,
__field(unsigned int, wreq)
__field(unsigned char, stream)
__field(unsigned long long, collected_to)
- __field(unsigned long long, front)
+ __field(unsigned long long, issued_to)
),
TP_fast_assign(
__entry->wreq = wreq->debug_id;
__entry->stream = stream->stream_nr;
__entry->collected_to = stream->collected_to;
- __entry->front = stream->front ? stream->front->start : UINT_MAX;
+ __entry->issued_to = atomic64_read(&wreq->issued_to);
),
- TP_printk("R=%08x[%x:] cto=%llx frn=%llx",
+ TP_printk("R=%08x[%x:] cto=%llx ito=%llx",
__entry->wreq, __entry->stream,
- __entry->collected_to, __entry->front)
+ __entry->collected_to, __entry->issued_to)
);
TRACE_EVENT(netfs_folioq,
diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h
index 26071021e986..56b6b60a814f 100644
--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
+++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
@@ -159,5 +159,9 @@ enum ip_conntrack_expect_events {
#define NF_CT_EXPECT_INACTIVE 0x2
#define NF_CT_EXPECT_USERSPACE 0x4
+#ifdef __KERNEL__
+#define NF_CT_EXPECT_MASK (NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE | \
+ NF_CT_EXPECT_USERSPACE)
+#endif
#endif /* _UAPI_NF_CONNTRACK_COMMON_H */
diff --git a/init/Kconfig b/init/Kconfig
index 444ce811ea67..7484cd703bc1 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -146,7 +146,7 @@ config CC_HAS_COUNTED_BY
config CC_HAS_COUNTED_BY_PTR
bool
# supported since clang 22
- default y if CC_IS_CLANG && CLANG_VERSION >= 220000
+ default y if CC_IS_CLANG && CLANG_VERSION >= 220100
# supported since gcc 16.0.0
default y if CC_IS_GCC && GCC_VERSION >= 160000
diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c
index 80178b69e05a..c2d3e45544bb 100644
--- a/io_uring/fdinfo.c
+++ b/io_uring/fdinfo.c
@@ -119,12 +119,14 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
sq_idx);
break;
}
- if ((++sq_head & sq_mask) == 0) {
+ if (sq_idx == sq_mask) {
seq_printf(m,
"%5u: corrupted sqe, wrapping 128B entry\n",
sq_idx);
break;
}
+ sq_head++;
+ i++;
sqe128 = true;
}
seq_printf(m, "%5u: opcode:%s, fd:%d, flags:%x, off:%llu, "
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index e1505c9cd09e..810991709da7 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3261,6 +3261,18 @@ static void bpf_link_defer_dealloc_rcu_gp(struct rcu_head *rcu)
bpf_link_dealloc(link);
}
+static bool bpf_link_is_tracepoint(struct bpf_link *link)
+{
+ /*
+ * Only these combinations support a tracepoint bpf_link.
+ * BPF_LINK_TYPE_TRACING raw_tp progs are hardcoded to use
+ * bpf_raw_tp_link_lops and thus dealloc_deferred(), see
+ * bpf_raw_tp_link_attach().
+ */
+ return link->type == BPF_LINK_TYPE_RAW_TRACEPOINT ||
+ (link->type == BPF_LINK_TYPE_TRACING && link->attach_type == BPF_TRACE_RAW_TP);
+}
+
static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu)
{
if (rcu_trace_implies_rcu_gp())
@@ -3279,16 +3291,25 @@ static void bpf_link_free(struct bpf_link *link)
if (link->prog)
ops->release(link);
if (ops->dealloc_deferred) {
- /* Schedule BPF link deallocation, which will only then
+ /*
+ * Schedule BPF link deallocation, which will only then
* trigger putting BPF program refcount.
* If underlying BPF program is sleepable or BPF link's target
* attach hookpoint is sleepable or otherwise requires RCU GPs
* to ensure link and its underlying BPF program is not
* reachable anymore, we need to first wait for RCU tasks
- * trace sync, and then go through "classic" RCU grace period
+ * trace sync, and then go through "classic" RCU grace period.
+ *
+ * For tracepoint BPF links, we need to go through SRCU grace
+ * period wait instead when non-faultable tracepoint is used. We
+ * don't need to chain SRCU grace period waits, however, for the
+ * faultable case, since it exclusively uses RCU Tasks Trace.
*/
if (link->sleepable || (link->prog && link->prog->sleepable))
call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp);
+ /* We need to do a SRCU grace period wait for non-faultable tracepoint BPF links. */
+ else if (bpf_link_is_tracepoint(link))
+ call_tracepoint_unregister_atomic(&link->rcu, bpf_link_defer_dealloc_rcu_gp);
else
call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp);
} else if (ops->dealloc) {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a431b7d50e1b..5434b162c930 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -642,6 +642,13 @@ static bool is_atomic_load_insn(const struct bpf_insn *insn)
insn->imm == BPF_LOAD_ACQ;
}
+static bool is_atomic_fetch_insn(const struct bpf_insn *insn)
+{
+ return BPF_CLASS(insn->code) == BPF_STX &&
+ BPF_MODE(insn->code) == BPF_ATOMIC &&
+ (insn->imm & BPF_FETCH);
+}
+
static int __get_spi(s32 off)
{
return (-off - 1) / BPF_REG_SIZE;
@@ -4499,10 +4506,24 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
* dreg still needs precision before this insn
*/
}
- } else if (class == BPF_LDX || is_atomic_load_insn(insn)) {
- if (!bt_is_reg_set(bt, dreg))
+ } else if (class == BPF_LDX ||
+ is_atomic_load_insn(insn) ||
+ is_atomic_fetch_insn(insn)) {
+ u32 load_reg = dreg;
+
+ /*
+ * Atomic fetch operation writes the old value into
+ * a register (sreg or r0) and if it was tracked for
+ * precision, propagate to the stack slot like we do
+ * in regular ldx.
+ */
+ if (is_atomic_fetch_insn(insn))
+ load_reg = insn->imm == BPF_CMPXCHG ?
+ BPF_REG_0 : sreg;
+
+ if (!bt_is_reg_set(bt, load_reg))
return 0;
- bt_clear_reg(bt, dreg);
+ bt_clear_reg(bt, load_reg);
/* scalars can only be spilled into stack w/o losing precision.
* Load from any other memory can be zero extended.
@@ -7996,7 +8017,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
} else if (reg->type == CONST_PTR_TO_MAP) {
err = check_ptr_to_map_access(env, regs, regno, off, size, t,
value_regno);
- } else if (base_type(reg->type) == PTR_TO_BUF) {
+ } else if (base_type(reg->type) == PTR_TO_BUF &&
+ !type_may_be_null(reg->type)) {
bool rdonly_mem = type_is_rdonly_mem(reg->type);
u32 *max_access;
@@ -20094,8 +20116,13 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
* since someone could have accessed through (ptr - k), or
* even done ptr -= k in a register, to get a safe access.
*/
- if (rold->range > rcur->range)
+ if (rold->range < 0 || rcur->range < 0) {
+ /* special case for [BEYOND|AT]_PKT_END */
+ if (rold->range != rcur->range)
+ return false;
+ } else if (rold->range > rcur->range) {
return false;
+ }
/* id relations must be preserved */
if (!check_ids(rold->id, rcur->id, idmap))
return false;
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 01fc2a93f3ef..4ca3cb993da2 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -2126,6 +2126,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
#endif
init_waitqueue_head(&cgrp->offline_waitq);
+ init_waitqueue_head(&cgrp->dying_populated_waitq);
INIT_WORK(&cgrp->release_agent_work, cgroup1_release_agent);
}
@@ -6224,6 +6225,78 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
return 0;
};
+/**
+ * cgroup_drain_dying - wait for dying tasks to leave before rmdir
+ * @cgrp: the cgroup being removed
+ *
+ * cgroup.procs and cgroup.threads use css_task_iter which filters out
+ * PF_EXITING tasks so that userspace doesn't see tasks that have already been
+ * reaped via waitpid(). However, cgroup_has_tasks() - which tests whether the
+ * cgroup has non-empty css_sets - is only updated when dying tasks pass through
+ * cgroup_task_dead() in finish_task_switch(). This creates a window where
+ * cgroup.procs reads empty but cgroup_has_tasks() is still true, making rmdir
+ * fail with -EBUSY from cgroup_destroy_locked() even though userspace sees no
+ * tasks.
+ *
+ * This function aligns cgroup_has_tasks() with what userspace can observe. If
+ * cgroup_has_tasks() but the task iterator sees nothing (all remaining tasks are
+ * PF_EXITING), we wait for cgroup_task_dead() to finish processing them. As the
+ * window between PF_EXITING and cgroup_task_dead() is short, the wait is brief.
+ *
+ * This function only concerns itself with this cgroup's own dying tasks.
+ * Whether the cgroup has children is cgroup_destroy_locked()'s problem.
+ *
+ * Each cgroup_task_dead() kicks the waitqueue via cset->cgrp_links, and we
+ * retry the full check from scratch.
+ *
+ * Must be called with cgroup_mutex held.
+ */
+static int cgroup_drain_dying(struct cgroup *cgrp)
+ __releases(&cgroup_mutex) __acquires(&cgroup_mutex)
+{
+ struct css_task_iter it;
+ struct task_struct *task;
+ DEFINE_WAIT(wait);
+
+ lockdep_assert_held(&cgroup_mutex);
+retry:
+ if (!cgroup_has_tasks(cgrp))
+ return 0;
+
+ /* Same iterator as cgroup.threads - if any task is visible, it's busy */
+ css_task_iter_start(&cgrp->self, 0, &it);
+ task = css_task_iter_next(&it);
+ css_task_iter_end(&it);
+
+ if (task)
+ return -EBUSY;
+
+ /*
+ * All remaining tasks are PF_EXITING and will pass through
+ * cgroup_task_dead() shortly. Wait for a kick and retry.
+ *
+ * cgroup_has_tasks() can't transition from false to true while we're
+ * holding cgroup_mutex, but the true to false transition happens
+ * under css_set_lock (via cgroup_task_dead()). We must retest and
+ * prepare_to_wait() under css_set_lock. Otherwise, the transition
+ * can happen between our first test and prepare_to_wait(), and we
+ * sleep with no one to wake us.
+ */
+ spin_lock_irq(&css_set_lock);
+ if (!cgroup_has_tasks(cgrp)) {
+ spin_unlock_irq(&css_set_lock);
+ return 0;
+ }
+ prepare_to_wait(&cgrp->dying_populated_waitq, &wait,
+ TASK_UNINTERRUPTIBLE);
+ spin_unlock_irq(&css_set_lock);
+ mutex_unlock(&cgroup_mutex);
+ schedule();
+ finish_wait(&cgrp->dying_populated_waitq, &wait);
+ mutex_lock(&cgroup_mutex);
+ goto retry;
+}
+
int cgroup_rmdir(struct kernfs_node *kn)
{
struct cgroup *cgrp;
@@ -6233,9 +6306,12 @@ int cgroup_rmdir(struct kernfs_node *kn)
if (!cgrp)
return 0;
- ret = cgroup_destroy_locked(cgrp);
- if (!ret)
- TRACE_CGROUP_PATH(rmdir, cgrp);
+ ret = cgroup_drain_dying(cgrp);
+ if (!ret) {
+ ret = cgroup_destroy_locked(cgrp);
+ if (!ret)
+ TRACE_CGROUP_PATH(rmdir, cgrp);
+ }
cgroup_kn_unlock(kn);
return ret;
@@ -6995,6 +7071,7 @@ void cgroup_task_exit(struct task_struct *tsk)
static void do_cgroup_task_dead(struct task_struct *tsk)
{
+ struct cgrp_cset_link *link;
struct css_set *cset;
unsigned long flags;
@@ -7008,6 +7085,11 @@ static void do_cgroup_task_dead(struct task_struct *tsk)
if (thread_group_leader(tsk) && atomic_read(&tsk->signal->live))
list_add_tail(&tsk->cg_list, &cset->dying_tasks);
+ /* kick cgroup_drain_dying() waiters, see cgroup_rmdir() */
+ list_for_each_entry(link, &cset->cgrp_links, cgrp_link)
+ if (waitqueue_active(&link->cgrp->dying_populated_waitq))
+ wake_up(&link->cgrp->dying_populated_waitq);
+
if (dl_task(tsk))
dec_dl_tasks_cs(tsk);
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index d21868455341..1335e437098e 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -2988,7 +2988,7 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
struct cgroup_subsys_state *css;
struct cpuset *cs, *oldcs;
struct task_struct *task;
- bool cpus_updated, mems_updated;
+ bool setsched_check;
int ret;
/* used later by cpuset_attach() */
@@ -3003,20 +3003,31 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
if (ret)
goto out_unlock;
- cpus_updated = !cpumask_equal(cs->effective_cpus, oldcs->effective_cpus);
- mems_updated = !nodes_equal(cs->effective_mems, oldcs->effective_mems);
+ /*
+ * Skip rights over task setsched check in v2 when nothing changes,
+ * migration permission derives from hierarchy ownership in
+ * cgroup_procs_write_permission()).
+ */
+ setsched_check = !cpuset_v2() ||
+ !cpumask_equal(cs->effective_cpus, oldcs->effective_cpus) ||
+ !nodes_equal(cs->effective_mems, oldcs->effective_mems);
+
+ /*
+ * A v1 cpuset with tasks will have no CPU left only when CPU hotplug
+ * brings the last online CPU offline as users are not allowed to empty
+ * cpuset.cpus when there are active tasks inside. When that happens,
+ * we should allow tasks to migrate out without security check to make
+ * sure they will be able to run after migration.
+ */
+ if (!is_in_v2_mode() && cpumask_empty(oldcs->effective_cpus))
+ setsched_check = false;
cgroup_taskset_for_each(task, css, tset) {
ret = task_can_attach(task);
if (ret)
goto out_unlock;
- /*
- * Skip rights over task check in v2 when nothing changes,
- * migration permission derives from hierarchy ownership in
- * cgroup_procs_write_permission()).
- */
- if (!cpuset_v2() || (cpus_updated || mems_updated)) {
+ if (setsched_check) {
ret = security_task_setscheduler(task);
if (ret)
goto out_unlock;
diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c
index 86f87e43438c..0677918f06a8 100644
--- a/kernel/dma/debug.c
+++ b/kernel/dma/debug.c
@@ -453,7 +453,7 @@ static int active_cacheline_set_overlap(phys_addr_t cln, int overlap)
return overlap;
}
-static void active_cacheline_inc_overlap(phys_addr_t cln)
+static void active_cacheline_inc_overlap(phys_addr_t cln, bool is_cache_clean)
{
int overlap = active_cacheline_read_overlap(cln);
@@ -462,7 +462,7 @@ static void active_cacheline_inc_overlap(phys_addr_t cln)
/* If we overflowed the overlap counter then we're potentially
* leaking dma-mappings.
*/
- WARN_ONCE(overlap > ACTIVE_CACHELINE_MAX_OVERLAP,
+ WARN_ONCE(!is_cache_clean && overlap > ACTIVE_CACHELINE_MAX_OVERLAP,
pr_fmt("exceeded %d overlapping mappings of cacheline %pa\n"),
ACTIVE_CACHELINE_MAX_OVERLAP, &cln);
}
@@ -495,7 +495,7 @@ static int active_cacheline_insert(struct dma_debug_entry *entry,
if (rc == -EEXIST) {
struct dma_debug_entry *existing;
- active_cacheline_inc_overlap(cln);
+ active_cacheline_inc_overlap(cln, entry->is_cache_clean);
existing = radix_tree_lookup(&dma_active_cacheline, cln);
/* A lookup failure here after we got -EEXIST is unexpected. */
WARN_ON(!existing);
@@ -601,7 +601,8 @@ static void add_dma_entry(struct dma_debug_entry *entry, unsigned long attrs)
unsigned long flags;
int rc;
- entry->is_cache_clean = !!(attrs & DMA_ATTR_CPU_CACHE_CLEAN);
+ entry->is_cache_clean = attrs & (DMA_ATTR_DEBUGGING_IGNORE_CACHELINES |
+ DMA_ATTR_REQUIRE_COHERENT);
bucket = get_hash_bucket(entry, &flags);
hash_bucket_add(bucket, entry);
diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h
index e89f175e9c2d..6184ff303f08 100644
--- a/kernel/dma/direct.h
+++ b/kernel/dma/direct.h
@@ -84,7 +84,7 @@ static inline dma_addr_t dma_direct_map_phys(struct device *dev,
dma_addr_t dma_addr;
if (is_swiotlb_force_bounce(dev)) {
- if (attrs & DMA_ATTR_MMIO)
+ if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT))
return DMA_MAPPING_ERROR;
return swiotlb_map(dev, phys, size, dir, attrs);
@@ -98,7 +98,8 @@ static inline dma_addr_t dma_direct_map_phys(struct device *dev,
dma_addr = phys_to_dma(dev, phys);
if (unlikely(!dma_capable(dev, dma_addr, size, true)) ||
dma_kmalloc_needs_bounce(dev, size, dir)) {
- if (is_swiotlb_active(dev))
+ if (is_swiotlb_active(dev) &&
+ !(attrs & DMA_ATTR_REQUIRE_COHERENT))
return swiotlb_map(dev, phys, size, dir, attrs);
goto err_overflow;
@@ -123,7 +124,7 @@ static inline void dma_direct_unmap_phys(struct device *dev, dma_addr_t addr,
{
phys_addr_t phys;
- if (attrs & DMA_ATTR_MMIO)
+ if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT))
/* nothing to do: uncached and no swiotlb */
return;
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index 3928a509c44c..6d3dd0bd3a88 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -164,6 +164,9 @@ dma_addr_t dma_map_phys(struct device *dev, phys_addr_t phys, size_t size,
if (WARN_ON_ONCE(!dev->dma_mask))
return DMA_MAPPING_ERROR;
+ if (!dev_is_dma_coherent(dev) && (attrs & DMA_ATTR_REQUIRE_COHERENT))
+ return DMA_MAPPING_ERROR;
+
if (dma_map_direct(dev, ops) ||
(!is_mmio && arch_dma_map_phys_direct(dev, phys + size)))
addr = dma_direct_map_phys(dev, phys, size, dir, attrs);
@@ -235,6 +238,9 @@ static int __dma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
BUG_ON(!valid_dma_direction(dir));
+ if (!dev_is_dma_coherent(dev) && (attrs & DMA_ATTR_REQUIRE_COHERENT))
+ return -EOPNOTSUPP;
+
if (WARN_ON_ONCE(!dev->dma_mask))
return 0;
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index d8e6f1d889d5..9fd73700ddcf 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -30,6 +30,7 @@
#include <linux/gfp.h>
#include <linux/highmem.h>
#include <linux/io.h>
+#include <linux/kmsan-checks.h>
#include <linux/iommu-helper.h>
#include <linux/init.h>
#include <linux/memblock.h>
@@ -901,10 +902,19 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size
local_irq_save(flags);
page = pfn_to_page(pfn);
- if (dir == DMA_TO_DEVICE)
+ if (dir == DMA_TO_DEVICE) {
+ /*
+ * Ideally, kmsan_check_highmem_page()
+ * could be used here to detect infoleaks,
+ * but callers may map uninitialized buffers
+ * that will be written by the device,
+ * causing false positives.
+ */
memcpy_from_page(vaddr, page, offset, sz);
- else
+ } else {
+ kmsan_unpoison_memory(vaddr, sz);
memcpy_to_page(page, offset, vaddr, sz);
+ }
local_irq_restore(flags);
size -= sz;
@@ -913,8 +923,15 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size
offset = 0;
}
} else if (dir == DMA_TO_DEVICE) {
+ /*
+ * Ideally, kmsan_check_memory() could be used here to detect
+ * infoleaks (uninitialized data being sent to device), but
+ * callers may map uninitialized buffers that will be written
+ * by the device, causing false positives.
+ */
memcpy(vaddr, phys_to_virt(orig_addr), size);
} else {
+ kmsan_unpoison_memory(vaddr, size);
memcpy(phys_to_virt(orig_addr), vaddr, size);
}
}
diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index cf7e610eac42..31e83a09789e 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -342,7 +342,7 @@ static int __futex_key_to_node(struct mm_struct *mm, unsigned long addr)
if (!vma)
return FUTEX_NO_NODE;
- mpol = vma_policy(vma);
+ mpol = READ_ONCE(vma->vm_policy);
if (!mpol)
return FUTEX_NO_NODE;
diff --git a/kernel/futex/pi.c b/kernel/futex/pi.c
index bc1f7e83a37e..7808068fa59e 100644
--- a/kernel/futex/pi.c
+++ b/kernel/futex/pi.c
@@ -918,7 +918,7 @@ int fixup_pi_owner(u32 __user *uaddr, struct futex_q *q, int locked)
int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int trylock)
{
struct hrtimer_sleeper timeout, *to;
- struct task_struct *exiting = NULL;
+ struct task_struct *exiting;
struct rt_mutex_waiter rt_waiter;
struct futex_q q = futex_q_init;
DEFINE_WAKE_Q(wake_q);
@@ -933,6 +933,7 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl
to = futex_setup_timer(time, &timeout, flags, 0);
retry:
+ exiting = NULL;
ret = get_futex_key(uaddr, flags, &q.key, FUTEX_WRITE);
if (unlikely(ret != 0))
goto out;
diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c
index 743c7a728237..77ad9691f6a6 100644
--- a/kernel/futex/syscalls.c
+++ b/kernel/futex/syscalls.c
@@ -459,6 +459,14 @@ SYSCALL_DEFINE4(futex_requeue,
if (ret)
return ret;
+ /*
+ * For now mandate both flags are identical, like the sys_futex()
+ * interface has. If/when we merge the variable sized futex support,
+ * that patch can modify this test to allow a difference in size.
+ */
+ if (futexes[0].w.flags != futexes[1].w.flags)
+ return -EINVAL;
+
cmpval = futexes[0].w.val;
return futex_requeue(u64_to_user_ptr(futexes[0].w.uaddr), futexes[0].w.flags,
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 5f8c9e12eaec..5429e9f19b65 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -40,7 +40,7 @@ void pm_restore_gfp_mask(void)
{
WARN_ON(!mutex_is_locked(&system_transition_mutex));
- if (WARN_ON(!saved_gfp_count) || --saved_gfp_count)
+ if (!saved_gfp_count || --saved_gfp_count)
return;
gfp_allowed_mask = saved_gfp_mask;
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 6e1321837c66..a564650734dc 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -2855,6 +2855,17 @@ int snapshot_write_finalize(struct snapshot_handle *handle)
{
int error;
+ /*
+ * Call snapshot_write_next() to drain any trailing zero pages,
+ * but make sure we're in the data page region first.
+ * This function can return PAGE_SIZE if the kernel was expecting
+ * another copy page. Return -ENODATA in that situation.
+ */
+ if (handle->cur > nr_meta_pages + 1) {
+ error = snapshot_write_next(handle);
+ if (error)
+ return error > 0 ? -ENODATA : error;
+ }
copy_last_highmem_page();
error = hibernate_restore_protect_page(handle->buffer);
/* Do that only if we have loaded the image entirely */
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index dc5d614b372c..9b10b57b79ad 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -502,6 +502,15 @@ do { \
___locked; \
})
+#define raw_spin_trylock_irqsave_rcu_node(p, flags) \
+({ \
+ bool ___locked = raw_spin_trylock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \
+ \
+ if (___locked) \
+ smp_mb__after_unlock_lock(); \
+ ___locked; \
+})
+
#define raw_lockdep_assert_held_rcu_node(p) \
lockdep_assert_held(&ACCESS_PRIVATE(p, lock))
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
index 3450c3751ef7..a2e2d516e51b 100644
--- a/kernel/rcu/srcutiny.c
+++ b/kernel/rcu/srcutiny.c
@@ -9,6 +9,7 @@
*/
#include <linux/export.h>
+#include <linux/irq_work.h>
#include <linux/mutex.h>
#include <linux/preempt.h>
#include <linux/rcupdate_wait.h>
@@ -41,6 +42,7 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp)
ssp->srcu_idx_max = 0;
INIT_WORK(&ssp->srcu_work, srcu_drive_gp);
INIT_LIST_HEAD(&ssp->srcu_work.entry);
+ init_irq_work(&ssp->srcu_irq_work, srcu_tiny_irq_work);
return 0;
}
@@ -84,6 +86,7 @@ EXPORT_SYMBOL_GPL(init_srcu_struct);
void cleanup_srcu_struct(struct srcu_struct *ssp)
{
WARN_ON(ssp->srcu_lock_nesting[0] || ssp->srcu_lock_nesting[1]);
+ irq_work_sync(&ssp->srcu_irq_work);
flush_work(&ssp->srcu_work);
WARN_ON(ssp->srcu_gp_running);
WARN_ON(ssp->srcu_gp_waiting);
@@ -177,6 +180,20 @@ void srcu_drive_gp(struct work_struct *wp)
}
EXPORT_SYMBOL_GPL(srcu_drive_gp);
+/*
+ * Use an irq_work to defer schedule_work() to avoid acquiring the workqueue
+ * pool->lock while the caller might hold scheduler locks, causing lockdep
+ * splats due to workqueue_init() doing a wakeup.
+ */
+void srcu_tiny_irq_work(struct irq_work *irq_work)
+{
+ struct srcu_struct *ssp;
+
+ ssp = container_of(irq_work, struct srcu_struct, srcu_irq_work);
+ schedule_work(&ssp->srcu_work);
+}
+EXPORT_SYMBOL_GPL(srcu_tiny_irq_work);
+
static void srcu_gp_start_if_needed(struct srcu_struct *ssp)
{
unsigned long cookie;
@@ -189,7 +206,7 @@ static void srcu_gp_start_if_needed(struct srcu_struct *ssp)
WRITE_ONCE(ssp->srcu_idx_max, cookie);
if (!READ_ONCE(ssp->srcu_gp_running)) {
if (likely(srcu_init_done))
- schedule_work(&ssp->srcu_work);
+ irq_work_queue(&ssp->srcu_irq_work);
else if (list_empty(&ssp->srcu_work.entry))
list_add(&ssp->srcu_work.entry, &srcu_boot_list);
}
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index aef8e91ad33e..0d01cd8c4b4a 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -19,6 +19,7 @@
#include <linux/mutex.h>
#include <linux/percpu.h>
#include <linux/preempt.h>
+#include <linux/irq_work.h>
#include <linux/rcupdate_wait.h>
#include <linux/sched.h>
#include <linux/smp.h>
@@ -75,44 +76,9 @@ static bool __read_mostly srcu_init_done;
static void srcu_invoke_callbacks(struct work_struct *work);
static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay);
static void process_srcu(struct work_struct *work);
+static void srcu_irq_work(struct irq_work *work);
static void srcu_delay_timer(struct timer_list *t);
-/* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */
-#define spin_lock_rcu_node(p) \
-do { \
- spin_lock(&ACCESS_PRIVATE(p, lock)); \
- smp_mb__after_unlock_lock(); \
-} while (0)
-
-#define spin_unlock_rcu_node(p) spin_unlock(&ACCESS_PRIVATE(p, lock))
-
-#define spin_lock_irq_rcu_node(p) \
-do { \
- spin_lock_irq(&ACCESS_PRIVATE(p, lock)); \
- smp_mb__after_unlock_lock(); \
-} while (0)
-
-#define spin_unlock_irq_rcu_node(p) \
- spin_unlock_irq(&ACCESS_PRIVATE(p, lock))
-
-#define spin_lock_irqsave_rcu_node(p, flags) \
-do { \
- spin_lock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \
- smp_mb__after_unlock_lock(); \
-} while (0)
-
-#define spin_trylock_irqsave_rcu_node(p, flags) \
-({ \
- bool ___locked = spin_trylock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \
- \
- if (___locked) \
- smp_mb__after_unlock_lock(); \
- ___locked; \
-})
-
-#define spin_unlock_irqrestore_rcu_node(p, flags) \
- spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags) \
-
/*
* Initialize SRCU per-CPU data. Note that statically allocated
* srcu_struct structures might already have srcu_read_lock() and
@@ -131,7 +97,7 @@ static void init_srcu_struct_data(struct srcu_struct *ssp)
*/
for_each_possible_cpu(cpu) {
sdp = per_cpu_ptr(ssp->sda, cpu);
- spin_lock_init(&ACCESS_PRIVATE(sdp, lock));
+ raw_spin_lock_init(&ACCESS_PRIVATE(sdp, lock));
rcu_segcblist_init(&sdp->srcu_cblist);
sdp->srcu_cblist_invoking = false;
sdp->srcu_gp_seq_needed = ssp->srcu_sup->srcu_gp_seq;
@@ -186,7 +152,7 @@ static bool init_srcu_struct_nodes(struct srcu_struct *ssp, gfp_t gfp_flags)
/* Each pass through this loop initializes one srcu_node structure. */
srcu_for_each_node_breadth_first(ssp, snp) {
- spin_lock_init(&ACCESS_PRIVATE(snp, lock));
+ raw_spin_lock_init(&ACCESS_PRIVATE(snp, lock));
BUILD_BUG_ON(ARRAY_SIZE(snp->srcu_have_cbs) !=
ARRAY_SIZE(snp->srcu_data_have_cbs));
for (i = 0; i < ARRAY_SIZE(snp->srcu_have_cbs); i++) {
@@ -242,7 +208,7 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
if (!ssp->srcu_sup)
return -ENOMEM;
if (!is_static)
- spin_lock_init(&ACCESS_PRIVATE(ssp->srcu_sup, lock));
+ raw_spin_lock_init(&ACCESS_PRIVATE(ssp->srcu_sup, lock));
ssp->srcu_sup->srcu_size_state = SRCU_SIZE_SMALL;
ssp->srcu_sup->node = NULL;
mutex_init(&ssp->srcu_sup->srcu_cb_mutex);
@@ -252,6 +218,7 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
mutex_init(&ssp->srcu_sup->srcu_barrier_mutex);
atomic_set(&ssp->srcu_sup->srcu_barrier_cpu_cnt, 0);
INIT_DELAYED_WORK(&ssp->srcu_sup->work, process_srcu);
+ init_irq_work(&ssp->srcu_sup->irq_work, srcu_irq_work);
ssp->srcu_sup->sda_is_static = is_static;
if (!is_static) {
ssp->sda = alloc_percpu(struct srcu_data);
@@ -263,9 +230,12 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
ssp->srcu_sup->srcu_gp_seq_needed_exp = SRCU_GP_SEQ_INITIAL_VAL;
ssp->srcu_sup->srcu_last_gp_end = ktime_get_mono_fast_ns();
if (READ_ONCE(ssp->srcu_sup->srcu_size_state) == SRCU_SIZE_SMALL && SRCU_SIZING_IS_INIT()) {
- if (!init_srcu_struct_nodes(ssp, is_static ? GFP_ATOMIC : GFP_KERNEL))
+ if (!preemptible())
+ WRITE_ONCE(ssp->srcu_sup->srcu_size_state, SRCU_SIZE_ALLOC);
+ else if (init_srcu_struct_nodes(ssp, GFP_KERNEL))
+ WRITE_ONCE(ssp->srcu_sup->srcu_size_state, SRCU_SIZE_BIG);
+ else
goto err_free_sda;
- WRITE_ONCE(ssp->srcu_sup->srcu_size_state, SRCU_SIZE_BIG);
}
ssp->srcu_sup->srcu_ssp = ssp;
smp_store_release(&ssp->srcu_sup->srcu_gp_seq_needed,
@@ -394,20 +364,20 @@ static void srcu_transition_to_big(struct srcu_struct *ssp)
/* Double-checked locking on ->srcu_size-state. */
if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) != SRCU_SIZE_SMALL)
return;
- spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags);
+ raw_spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags);
if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) != SRCU_SIZE_SMALL) {
- spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
+ raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
return;
}
__srcu_transition_to_big(ssp);
- spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
+ raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
}
/*
* Check to see if the just-encountered contention event justifies
* a transition to SRCU_SIZE_BIG.
*/
-static void spin_lock_irqsave_check_contention(struct srcu_struct *ssp)
+static void raw_spin_lock_irqsave_check_contention(struct srcu_struct *ssp)
{
unsigned long j;
@@ -429,16 +399,16 @@ static void spin_lock_irqsave_check_contention(struct srcu_struct *ssp)
* to SRCU_SIZE_BIG. But only if the srcutree.convert_to_big module
* parameter permits this.
*/
-static void spin_lock_irqsave_sdp_contention(struct srcu_data *sdp, unsigned long *flags)
+static void raw_spin_lock_irqsave_sdp_contention(struct srcu_data *sdp, unsigned long *flags)
{
struct srcu_struct *ssp = sdp->ssp;
- if (spin_trylock_irqsave_rcu_node(sdp, *flags))
+ if (raw_spin_trylock_irqsave_rcu_node(sdp, *flags))
return;
- spin_lock_irqsave_rcu_node(ssp->srcu_sup, *flags);
- spin_lock_irqsave_check_contention(ssp);
- spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, *flags);
- spin_lock_irqsave_rcu_node(sdp, *flags);
+ raw_spin_lock_irqsave_rcu_node(ssp->srcu_sup, *flags);
+ raw_spin_lock_irqsave_check_contention(ssp);
+ raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, *flags);
+ raw_spin_lock_irqsave_rcu_node(sdp, *flags);
}
/*
@@ -447,12 +417,12 @@ static void spin_lock_irqsave_sdp_contention(struct srcu_data *sdp, unsigned lon
* to SRCU_SIZE_BIG. But only if the srcutree.convert_to_big module
* parameter permits this.
*/
-static void spin_lock_irqsave_ssp_contention(struct srcu_struct *ssp, unsigned long *flags)
+static void raw_spin_lock_irqsave_ssp_contention(struct srcu_struct *ssp, unsigned long *flags)
{
- if (spin_trylock_irqsave_rcu_node(ssp->srcu_sup, *flags))
+ if (raw_spin_trylock_irqsave_rcu_node(ssp->srcu_sup, *flags))
return;
- spin_lock_irqsave_rcu_node(ssp->srcu_sup, *flags);
- spin_lock_irqsave_check_contention(ssp);
+ raw_spin_lock_irqsave_rcu_node(ssp->srcu_sup, *flags);
+ raw_spin_lock_irqsave_check_contention(ssp);
}
/*
@@ -470,13 +440,13 @@ static void check_init_srcu_struct(struct srcu_struct *ssp)
/* The smp_load_acquire() pairs with the smp_store_release(). */
if (!rcu_seq_state(smp_load_acquire(&ssp->srcu_sup->srcu_gp_seq_needed))) /*^^^*/
return; /* Already initialized. */
- spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags);
+ raw_spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags);
if (!rcu_seq_state(ssp->srcu_sup->srcu_gp_seq_needed)) {
- spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
+ raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
return;
}
init_srcu_struct_fields(ssp, true);
- spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
+ raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
}
/*
@@ -742,13 +712,15 @@ void cleanup_srcu_struct(struct srcu_struct *ssp)
unsigned long delay;
struct srcu_usage *sup = ssp->srcu_sup;
- spin_lock_irq_rcu_node(ssp->srcu_sup);
+ raw_spin_lock_irq_rcu_node(ssp->srcu_sup);
delay = srcu_get_delay(ssp);
- spin_unlock_irq_rcu_node(ssp->srcu_sup);
+ raw_spin_unlock_irq_rcu_node(ssp->srcu_sup);
if (WARN_ON(!delay))
return; /* Just leak it! */
if (WARN_ON(srcu_readers_active(ssp)))
return; /* Just leak it! */
+ /* Wait for irq_work to finish first as it may queue a new work. */
+ irq_work_sync(&sup->irq_work);
flush_delayed_work(&sup->work);
for_each_possible_cpu(cpu) {
struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
@@ -960,7 +932,7 @@ static void srcu_gp_end(struct srcu_struct *ssp)
mutex_lock(&sup->srcu_cb_mutex);
/* End the current grace period. */
- spin_lock_irq_rcu_node(sup);
+ raw_spin_lock_irq_rcu_node(sup);
idx = rcu_seq_state(sup->srcu_gp_seq);
WARN_ON_ONCE(idx != SRCU_STATE_SCAN2);
if (srcu_gp_is_expedited(ssp))
@@ -971,7 +943,7 @@ static void srcu_gp_end(struct srcu_struct *ssp)
gpseq = rcu_seq_current(&sup->srcu_gp_seq);
if (ULONG_CMP_LT(sup->srcu_gp_seq_needed_exp, gpseq))
WRITE_ONCE(sup->srcu_gp_seq_needed_exp, gpseq);
- spin_unlock_irq_rcu_node(sup);
+ raw_spin_unlock_irq_rcu_node(sup);
mutex_unlock(&sup->srcu_gp_mutex);
/* A new grace period can start at this point. But only one. */
@@ -983,7 +955,7 @@ static void srcu_gp_end(struct srcu_struct *ssp)
} else {
idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs);
srcu_for_each_node_breadth_first(ssp, snp) {
- spin_lock_irq_rcu_node(snp);
+ raw_spin_lock_irq_rcu_node(snp);
cbs = false;
last_lvl = snp >= sup->level[rcu_num_lvls - 1];
if (last_lvl)
@@ -998,7 +970,7 @@ static void srcu_gp_end(struct srcu_struct *ssp)
else
mask = snp->srcu_data_have_cbs[idx];
snp->srcu_data_have_cbs[idx] = 0;
- spin_unlock_irq_rcu_node(snp);
+ raw_spin_unlock_irq_rcu_node(snp);
if (cbs)
srcu_schedule_cbs_snp(ssp, snp, mask, cbdelay);
}
@@ -1008,27 +980,27 @@ static void srcu_gp_end(struct srcu_struct *ssp)
if (!(gpseq & counter_wrap_check))
for_each_possible_cpu(cpu) {
sdp = per_cpu_ptr(ssp->sda, cpu);
- spin_lock_irq_rcu_node(sdp);
+ raw_spin_lock_irq_rcu_node(sdp);
if (ULONG_CMP_GE(gpseq, sdp->srcu_gp_seq_needed + 100))
sdp->srcu_gp_seq_needed = gpseq;
if (ULONG_CMP_GE(gpseq, sdp->srcu_gp_seq_needed_exp + 100))
sdp->srcu_gp_seq_needed_exp = gpseq;
- spin_unlock_irq_rcu_node(sdp);
+ raw_spin_unlock_irq_rcu_node(sdp);
}
/* Callback initiation done, allow grace periods after next. */
mutex_unlock(&sup->srcu_cb_mutex);
/* Start a new grace period if needed. */
- spin_lock_irq_rcu_node(sup);
+ raw_spin_lock_irq_rcu_node(sup);
gpseq = rcu_seq_current(&sup->srcu_gp_seq);
if (!rcu_seq_state(gpseq) &&
ULONG_CMP_LT(gpseq, sup->srcu_gp_seq_needed)) {
srcu_gp_start(ssp);
- spin_unlock_irq_rcu_node(sup);
+ raw_spin_unlock_irq_rcu_node(sup);
srcu_reschedule(ssp, 0);
} else {
- spin_unlock_irq_rcu_node(sup);
+ raw_spin_unlock_irq_rcu_node(sup);
}
/* Transition to big if needed. */
@@ -1059,19 +1031,19 @@ static void srcu_funnel_exp_start(struct srcu_struct *ssp, struct srcu_node *snp
if (WARN_ON_ONCE(rcu_seq_done(&ssp->srcu_sup->srcu_gp_seq, s)) ||
(!srcu_invl_snp_seq(sgsne) && ULONG_CMP_GE(sgsne, s)))
return;
- spin_lock_irqsave_rcu_node(snp, flags);
+ raw_spin_lock_irqsave_rcu_node(snp, flags);
sgsne = snp->srcu_gp_seq_needed_exp;
if (!srcu_invl_snp_seq(sgsne) && ULONG_CMP_GE(sgsne, s)) {
- spin_unlock_irqrestore_rcu_node(snp, flags);
+ raw_spin_unlock_irqrestore_rcu_node(snp, flags);
return;
}
WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s);
- spin_unlock_irqrestore_rcu_node(snp, flags);
+ raw_spin_unlock_irqrestore_rcu_node(snp, flags);
}
- spin_lock_irqsave_ssp_contention(ssp, &flags);
+ raw_spin_lock_irqsave_ssp_contention(ssp, &flags);
if (ULONG_CMP_LT(ssp->srcu_sup->srcu_gp_seq_needed_exp, s))
WRITE_ONCE(ssp->srcu_sup->srcu_gp_seq_needed_exp, s);
- spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
+ raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
}
/*
@@ -1109,12 +1081,12 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
for (snp = snp_leaf; snp != NULL; snp = snp->srcu_parent) {
if (WARN_ON_ONCE(rcu_seq_done(&sup->srcu_gp_seq, s)) && snp != snp_leaf)
return; /* GP already done and CBs recorded. */
- spin_lock_irqsave_rcu_node(snp, flags);
+ raw_spin_lock_irqsave_rcu_node(snp, flags);
snp_seq = snp->srcu_have_cbs[idx];
if (!srcu_invl_snp_seq(snp_seq) && ULONG_CMP_GE(snp_seq, s)) {
if (snp == snp_leaf && snp_seq == s)
snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
- spin_unlock_irqrestore_rcu_node(snp, flags);
+ raw_spin_unlock_irqrestore_rcu_node(snp, flags);
if (snp == snp_leaf && snp_seq != s) {
srcu_schedule_cbs_sdp(sdp, do_norm ? SRCU_INTERVAL : 0);
return;
@@ -1129,11 +1101,11 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
sgsne = snp->srcu_gp_seq_needed_exp;
if (!do_norm && (srcu_invl_snp_seq(sgsne) || ULONG_CMP_LT(sgsne, s)))
WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s);
- spin_unlock_irqrestore_rcu_node(snp, flags);
+ raw_spin_unlock_irqrestore_rcu_node(snp, flags);
}
/* Top of tree, must ensure the grace period will be started. */
- spin_lock_irqsave_ssp_contention(ssp, &flags);
+ raw_spin_lock_irqsave_ssp_contention(ssp, &flags);
if (ULONG_CMP_LT(sup->srcu_gp_seq_needed, s)) {
/*
* Record need for grace period s. Pair with load
@@ -1154,13 +1126,17 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
// it isn't. And it does not have to be. After all, it
// can only be executed during early boot when there is only
// the one boot CPU running with interrupts still disabled.
+ //
+ // Use an irq_work here to avoid acquiring runqueue lock with
+ // srcu rcu_node::lock held. BPF instrument could introduce the
+ // opposite dependency, hence we need to break the possible
+ // locking dependency here.
if (likely(srcu_init_done))
- queue_delayed_work(rcu_gp_wq, &sup->work,
- !!srcu_get_delay(ssp));
+ irq_work_queue(&sup->irq_work);
else if (list_empty(&sup->work.work.entry))
list_add(&sup->work.work.entry, &srcu_boot_list);
}
- spin_unlock_irqrestore_rcu_node(sup, flags);
+ raw_spin_unlock_irqrestore_rcu_node(sup, flags);
}
/*
@@ -1172,9 +1148,9 @@ static bool try_check_zero(struct srcu_struct *ssp, int idx, int trycount)
{
unsigned long curdelay;
- spin_lock_irq_rcu_node(ssp->srcu_sup);
+ raw_spin_lock_irq_rcu_node(ssp->srcu_sup);
curdelay = !srcu_get_delay(ssp);
- spin_unlock_irq_rcu_node(ssp->srcu_sup);
+ raw_spin_unlock_irq_rcu_node(ssp->srcu_sup);
for (;;) {
if (srcu_readers_active_idx_check(ssp, idx))
@@ -1285,12 +1261,12 @@ static bool srcu_should_expedite(struct srcu_struct *ssp)
return false;
/* If the local srcu_data structure has callbacks, not idle. */
sdp = raw_cpu_ptr(ssp->sda);
- spin_lock_irqsave_rcu_node(sdp, flags);
+ raw_spin_lock_irqsave_rcu_node(sdp, flags);
if (rcu_segcblist_pend_cbs(&sdp->srcu_cblist)) {
- spin_unlock_irqrestore_rcu_node(sdp, flags);
+ raw_spin_unlock_irqrestore_rcu_node(sdp, flags);
return false; /* Callbacks already present, so not idle. */
}
- spin_unlock_irqrestore_rcu_node(sdp, flags);
+ raw_spin_unlock_irqrestore_rcu_node(sdp, flags);
/*
* No local callbacks, so probabilistically probe global state.
@@ -1350,7 +1326,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
sdp = per_cpu_ptr(ssp->sda, get_boot_cpu_id());
else
sdp = raw_cpu_ptr(ssp->sda);
- spin_lock_irqsave_sdp_contention(sdp, &flags);
+ raw_spin_lock_irqsave_sdp_contention(sdp, &flags);
if (rhp)
rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp);
/*
@@ -1410,7 +1386,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
sdp->srcu_gp_seq_needed_exp = s;
needexp = true;
}
- spin_unlock_irqrestore_rcu_node(sdp, flags);
+ raw_spin_unlock_irqrestore_rcu_node(sdp, flags);
/* Ensure that snp node tree is fully initialized before traversing it */
if (ss_state < SRCU_SIZE_WAIT_BARRIER)
@@ -1522,7 +1498,7 @@ static void __synchronize_srcu(struct srcu_struct *ssp, bool do_norm)
/*
* Make sure that later code is ordered after the SRCU grace
- * period. This pairs with the spin_lock_irq_rcu_node()
+ * period. This pairs with the raw_spin_lock_irq_rcu_node()
* in srcu_invoke_callbacks(). Unlike Tree RCU, this is needed
* because the current CPU might have been totally uninvolved with
* (and thus unordered against) that grace period.
@@ -1701,7 +1677,7 @@ static void srcu_barrier_cb(struct rcu_head *rhp)
*/
static void srcu_barrier_one_cpu(struct srcu_struct *ssp, struct srcu_data *sdp)
{
- spin_lock_irq_rcu_node(sdp);
+ raw_spin_lock_irq_rcu_node(sdp);
atomic_inc(&ssp->srcu_sup->srcu_barrier_cpu_cnt);
sdp->srcu_barrier_head.func = srcu_barrier_cb;
debug_rcu_head_queue(&sdp->srcu_barrier_head);
@@ -1710,7 +1686,7 @@ static void srcu_barrier_one_cpu(struct srcu_struct *ssp, struct srcu_data *sdp)
debug_rcu_head_unqueue(&sdp->srcu_barrier_head);
atomic_dec(&ssp->srcu_sup->srcu_barrier_cpu_cnt);
}
- spin_unlock_irq_rcu_node(sdp);
+ raw_spin_unlock_irq_rcu_node(sdp);
}
/**
@@ -1761,7 +1737,7 @@ static void srcu_expedite_current_cb(struct rcu_head *rhp)
bool needcb = false;
struct srcu_data *sdp = container_of(rhp, struct srcu_data, srcu_ec_head);
- spin_lock_irqsave_sdp_contention(sdp, &flags);
+ raw_spin_lock_irqsave_sdp_contention(sdp, &flags);
if (sdp->srcu_ec_state == SRCU_EC_IDLE) {
WARN_ON_ONCE(1);
} else if (sdp->srcu_ec_state == SRCU_EC_PENDING) {
@@ -1771,7 +1747,7 @@ static void srcu_expedite_current_cb(struct rcu_head *rhp)
sdp->srcu_ec_state = SRCU_EC_PENDING;
needcb = true;
}
- spin_unlock_irqrestore_rcu_node(sdp, flags);
+ raw_spin_unlock_irqrestore_rcu_node(sdp, flags);
// If needed, requeue ourselves as an expedited SRCU callback.
if (needcb)
__call_srcu(sdp->ssp, &sdp->srcu_ec_head, srcu_expedite_current_cb, false);
@@ -1795,7 +1771,7 @@ void srcu_expedite_current(struct srcu_struct *ssp)
migrate_disable();
sdp = this_cpu_ptr(ssp->sda);
- spin_lock_irqsave_sdp_contention(sdp, &flags);
+ raw_spin_lock_irqsave_sdp_contention(sdp, &flags);
if (sdp->srcu_ec_state == SRCU_EC_IDLE) {
sdp->srcu_ec_state = SRCU_EC_PENDING;
needcb = true;
@@ -1804,7 +1780,7 @@ void srcu_expedite_current(struct srcu_struct *ssp)
} else {
WARN_ON_ONCE(sdp->srcu_ec_state != SRCU_EC_REPOST);
}
- spin_unlock_irqrestore_rcu_node(sdp, flags);
+ raw_spin_unlock_irqrestore_rcu_node(sdp, flags);
// If needed, queue an expedited SRCU callback.
if (needcb)
__call_srcu(ssp, &sdp->srcu_ec_head, srcu_expedite_current_cb, false);
@@ -1848,17 +1824,17 @@ static void srcu_advance_state(struct srcu_struct *ssp)
*/
idx = rcu_seq_state(smp_load_acquire(&ssp->srcu_sup->srcu_gp_seq)); /* ^^^ */
if (idx == SRCU_STATE_IDLE) {
- spin_lock_irq_rcu_node(ssp->srcu_sup);
+ raw_spin_lock_irq_rcu_node(ssp->srcu_sup);
if (ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed)) {
WARN_ON_ONCE(rcu_seq_state(ssp->srcu_sup->srcu_gp_seq));
- spin_unlock_irq_rcu_node(ssp->srcu_sup);
+ raw_spin_unlock_irq_rcu_node(ssp->srcu_sup);
mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex);
return;
}
idx = rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq));
if (idx == SRCU_STATE_IDLE)
srcu_gp_start(ssp);
- spin_unlock_irq_rcu_node(ssp->srcu_sup);
+ raw_spin_unlock_irq_rcu_node(ssp->srcu_sup);
if (idx != SRCU_STATE_IDLE) {
mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex);
return; /* Someone else started the grace period. */
@@ -1872,10 +1848,10 @@ static void srcu_advance_state(struct srcu_struct *ssp)
return; /* readers present, retry later. */
}
srcu_flip(ssp);
- spin_lock_irq_rcu_node(ssp->srcu_sup);
+ raw_spin_lock_irq_rcu_node(ssp->srcu_sup);
rcu_seq_set_state(&ssp->srcu_sup->srcu_gp_seq, SRCU_STATE_SCAN2);
ssp->srcu_sup->srcu_n_exp_nodelay = 0;
- spin_unlock_irq_rcu_node(ssp->srcu_sup);
+ raw_spin_unlock_irq_rcu_node(ssp->srcu_sup);
}
if (rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq)) == SRCU_STATE_SCAN2) {
@@ -1913,7 +1889,7 @@ static void srcu_invoke_callbacks(struct work_struct *work)
ssp = sdp->ssp;
rcu_cblist_init(&ready_cbs);
- spin_lock_irq_rcu_node(sdp);
+ raw_spin_lock_irq_rcu_node(sdp);
WARN_ON_ONCE(!rcu_segcblist_segempty(&sdp->srcu_cblist, RCU_NEXT_TAIL));
rcu_segcblist_advance(&sdp->srcu_cblist,
rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq));
@@ -1924,7 +1900,7 @@ static void srcu_invoke_callbacks(struct work_struct *work)
*/
if (sdp->srcu_cblist_invoking ||
!rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) {
- spin_unlock_irq_rcu_node(sdp);
+ raw_spin_unlock_irq_rcu_node(sdp);
return; /* Someone else on the job or nothing to do. */
}
@@ -1932,7 +1908,7 @@ static void srcu_invoke_callbacks(struct work_struct *work)
sdp->srcu_cblist_invoking = true;
rcu_segcblist_extract_done_cbs(&sdp->srcu_cblist, &ready_cbs);
len = ready_cbs.len;
- spin_unlock_irq_rcu_node(sdp);
+ raw_spin_unlock_irq_rcu_node(sdp);
rhp = rcu_cblist_dequeue(&ready_cbs);
for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) {
debug_rcu_head_unqueue(rhp);
@@ -1947,11 +1923,11 @@ static void srcu_invoke_callbacks(struct work_struct *work)
* Update counts, accelerate new callbacks, and if needed,
* schedule another round of callback invocation.
*/
- spin_lock_irq_rcu_node(sdp);
+ raw_spin_lock_irq_rcu_node(sdp);
rcu_segcblist_add_len(&sdp->srcu_cblist, -len);
sdp->srcu_cblist_invoking = false;
more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist);
- spin_unlock_irq_rcu_node(sdp);
+ raw_spin_unlock_irq_rcu_node(sdp);
/* An SRCU barrier or callbacks from previous nesting work pending */
if (more)
srcu_schedule_cbs_sdp(sdp, 0);
@@ -1965,7 +1941,7 @@ static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay)
{
bool pushgp = true;
- spin_lock_irq_rcu_node(ssp->srcu_sup);
+ raw_spin_lock_irq_rcu_node(ssp->srcu_sup);
if (ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed)) {
if (!WARN_ON_ONCE(rcu_seq_state(ssp->srcu_sup->srcu_gp_seq))) {
/* All requests fulfilled, time to go idle. */
@@ -1975,7 +1951,7 @@ static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay)
/* Outstanding request and no GP. Start one. */
srcu_gp_start(ssp);
}
- spin_unlock_irq_rcu_node(ssp->srcu_sup);
+ raw_spin_unlock_irq_rcu_node(ssp->srcu_sup);
if (pushgp)
queue_delayed_work(rcu_gp_wq, &ssp->srcu_sup->work, delay);
@@ -1995,9 +1971,9 @@ static void process_srcu(struct work_struct *work)
ssp = sup->srcu_ssp;
srcu_advance_state(ssp);
- spin_lock_irq_rcu_node(ssp->srcu_sup);
+ raw_spin_lock_irq_rcu_node(ssp->srcu_sup);
curdelay = srcu_get_delay(ssp);
- spin_unlock_irq_rcu_node(ssp->srcu_sup);
+ raw_spin_unlock_irq_rcu_node(ssp->srcu_sup);
if (curdelay) {
WRITE_ONCE(sup->reschedule_count, 0);
} else {
@@ -2015,6 +1991,23 @@ static void process_srcu(struct work_struct *work)
srcu_reschedule(ssp, curdelay);
}
+static void srcu_irq_work(struct irq_work *work)
+{
+ struct srcu_struct *ssp;
+ struct srcu_usage *sup;
+ unsigned long delay;
+ unsigned long flags;
+
+ sup = container_of(work, struct srcu_usage, irq_work);
+ ssp = sup->srcu_ssp;
+
+ raw_spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags);
+ delay = srcu_get_delay(ssp);
+ raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
+
+ queue_delayed_work(rcu_gp_wq, &sup->work, !!delay);
+}
+
void srcutorture_get_gp_data(struct srcu_struct *ssp, int *flags,
unsigned long *gp_seq)
{
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 26a6ac2f8826..d5bdcdb3f700 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -2404,7 +2404,7 @@ static void put_prev_task_scx(struct rq *rq, struct task_struct *p,
{
struct scx_sched *sch = scx_root;
- /* see kick_cpus_irq_workfn() */
+ /* see kick_sync_wait_bal_cb() */
smp_store_release(&rq->scx.kick_sync, rq->scx.kick_sync + 1);
update_curr_scx(rq);
@@ -2447,6 +2447,48 @@ switch_class:
switch_class(rq, next);
}
+static void kick_sync_wait_bal_cb(struct rq *rq)
+{
+ struct scx_kick_syncs __rcu *ks = __this_cpu_read(scx_kick_syncs);
+ unsigned long *ksyncs = rcu_dereference_sched(ks)->syncs;
+ bool waited;
+ s32 cpu;
+
+ /*
+ * Drop rq lock and enable IRQs while waiting. IRQs must be enabled
+ * — a target CPU may be waiting for us to process an IPI (e.g. TLB
+ * flush) while we wait for its kick_sync to advance.
+ *
+ * Also, keep advancing our own kick_sync so that new kick_sync waits
+ * targeting us, which can start after we drop the lock, cannot form
+ * cyclic dependencies.
+ */
+retry:
+ waited = false;
+ for_each_cpu(cpu, rq->scx.cpus_to_sync) {
+ /*
+ * smp_load_acquire() pairs with smp_store_release() on
+ * kick_sync updates on the target CPUs.
+ */
+ if (cpu == cpu_of(rq) ||
+ smp_load_acquire(&cpu_rq(cpu)->scx.kick_sync) != ksyncs[cpu]) {
+ cpumask_clear_cpu(cpu, rq->scx.cpus_to_sync);
+ continue;
+ }
+
+ raw_spin_rq_unlock_irq(rq);
+ while (READ_ONCE(cpu_rq(cpu)->scx.kick_sync) == ksyncs[cpu]) {
+ smp_store_release(&rq->scx.kick_sync, rq->scx.kick_sync + 1);
+ cpu_relax();
+ }
+ raw_spin_rq_lock_irq(rq);
+ waited = true;
+ }
+
+ if (waited)
+ goto retry;
+}
+
static struct task_struct *first_local_task(struct rq *rq)
{
return list_first_entry_or_null(&rq->scx.local_dsq.list,
@@ -2460,7 +2502,7 @@ do_pick_task_scx(struct rq *rq, struct rq_flags *rf, bool force_scx)
bool keep_prev;
struct task_struct *p;
- /* see kick_cpus_irq_workfn() */
+ /* see kick_sync_wait_bal_cb() */
smp_store_release(&rq->scx.kick_sync, rq->scx.kick_sync + 1);
rq_modified_begin(rq, &ext_sched_class);
@@ -2471,6 +2513,17 @@ do_pick_task_scx(struct rq *rq, struct rq_flags *rf, bool force_scx)
maybe_queue_balance_callback(rq);
/*
+ * Defer to a balance callback which can drop rq lock and enable
+ * IRQs. Waiting directly in the pick path would deadlock against
+ * CPUs sending us IPIs (e.g. TLB flushes) while we wait for them.
+ */
+ if (unlikely(rq->scx.kick_sync_pending)) {
+ rq->scx.kick_sync_pending = false;
+ queue_balance_callback(rq, &rq->scx.kick_sync_bal_cb,
+ kick_sync_wait_bal_cb);
+ }
+
+ /*
* If any higher-priority sched class enqueued a runnable task on
* this rq during balance_one(), abort and return RETRY_TASK, so
* that the scheduler loop can restart.
@@ -4713,6 +4766,9 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len)
if (!cpumask_empty(rq->scx.cpus_to_wait))
dump_line(&ns, " cpus_to_wait : %*pb",
cpumask_pr_args(rq->scx.cpus_to_wait));
+ if (!cpumask_empty(rq->scx.cpus_to_sync))
+ dump_line(&ns, " cpus_to_sync : %*pb",
+ cpumask_pr_args(rq->scx.cpus_to_sync));
used = seq_buf_used(&ns);
if (SCX_HAS_OP(sch, dump_cpu)) {
@@ -5610,11 +5666,11 @@ static bool kick_one_cpu(s32 cpu, struct rq *this_rq, unsigned long *ksyncs)
if (cpumask_test_cpu(cpu, this_scx->cpus_to_wait)) {
if (cur_class == &ext_sched_class) {
+ cpumask_set_cpu(cpu, this_scx->cpus_to_sync);
ksyncs[cpu] = rq->scx.kick_sync;
should_wait = true;
- } else {
- cpumask_clear_cpu(cpu, this_scx->cpus_to_wait);
}
+ cpumask_clear_cpu(cpu, this_scx->cpus_to_wait);
}
resched_curr(rq);
@@ -5669,27 +5725,15 @@ static void kick_cpus_irq_workfn(struct irq_work *irq_work)
cpumask_clear_cpu(cpu, this_scx->cpus_to_kick_if_idle);
}
- if (!should_wait)
- return;
-
- for_each_cpu(cpu, this_scx->cpus_to_wait) {
- unsigned long *wait_kick_sync = &cpu_rq(cpu)->scx.kick_sync;
-
- /*
- * Busy-wait until the task running at the time of kicking is no
- * longer running. This can be used to implement e.g. core
- * scheduling.
- *
- * smp_cond_load_acquire() pairs with store_releases in
- * pick_task_scx() and put_prev_task_scx(). The former breaks
- * the wait if SCX's scheduling path is entered even if the same
- * task is picked subsequently. The latter is necessary to break
- * the wait when $cpu is taken by a higher sched class.
- */
- if (cpu != cpu_of(this_rq))
- smp_cond_load_acquire(wait_kick_sync, VAL != ksyncs[cpu]);
-
- cpumask_clear_cpu(cpu, this_scx->cpus_to_wait);
+ /*
+ * Can't wait in hardirq — kick_sync can't advance, deadlocking if
+ * CPUs wait for each other. Defer to kick_sync_wait_bal_cb().
+ */
+ if (should_wait) {
+ raw_spin_rq_lock(this_rq);
+ this_scx->kick_sync_pending = true;
+ resched_curr(this_rq);
+ raw_spin_rq_unlock(this_rq);
}
}
@@ -5794,6 +5838,7 @@ void __init init_sched_ext_class(void)
BUG_ON(!zalloc_cpumask_var_node(&rq->scx.cpus_to_kick_if_idle, GFP_KERNEL, n));
BUG_ON(!zalloc_cpumask_var_node(&rq->scx.cpus_to_preempt, GFP_KERNEL, n));
BUG_ON(!zalloc_cpumask_var_node(&rq->scx.cpus_to_wait, GFP_KERNEL, n));
+ BUG_ON(!zalloc_cpumask_var_node(&rq->scx.cpus_to_sync, GFP_KERNEL, n));
rq->scx.deferred_irq_work = IRQ_WORK_INIT_HARD(deferred_irq_workfn);
rq->scx.kick_cpus_irq_work = IRQ_WORK_INIT_HARD(kick_cpus_irq_workfn);
diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c
index ba298ac3ce6c..0ae93cd64004 100644
--- a/kernel/sched/ext_idle.c
+++ b/kernel/sched/ext_idle.c
@@ -543,7 +543,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
* piled up on it even if there is an idle core elsewhere on
* the system.
*/
- waker_node = cpu_to_node(cpu);
+ waker_node = scx_cpu_node_if_enabled(cpu);
if (!(current->flags & PF_EXITING) &&
cpu_rq(cpu)->scx.local_dsq.nr == 0 &&
(!(flags & SCX_PICK_IDLE_IN_NODE) || (waker_node == node)) &&
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 43bbf0693cca..1ef9ba480f51 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -805,9 +805,12 @@ struct scx_rq {
cpumask_var_t cpus_to_kick_if_idle;
cpumask_var_t cpus_to_preempt;
cpumask_var_t cpus_to_wait;
+ cpumask_var_t cpus_to_sync;
+ bool kick_sync_pending;
unsigned long kick_sync;
local_t reenq_local_deferred;
struct balance_callback deferred_bal_cb;
+ struct balance_callback kick_sync_bal_cb;
struct irq_work deferred_irq_work;
struct irq_work kick_cpus_irq_work;
struct scx_dispatch_q bypass_dsq;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 9d3a666ffde1..c9efb17cc255 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1118,7 +1118,7 @@ int proc_do_large_bitmap(const struct ctl_table *table, int dir,
unsigned long bitmap_len = table->maxlen;
unsigned long *bitmap = *(unsigned long **) table->data;
unsigned long *tmp_bitmap = NULL;
- char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
+ char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c = 0;
if (!bitmap || !bitmap_len || !left || (*ppos && SYSCTL_KERN_TO_USER(dir))) {
*lenp = 0;
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 069d93bfb0c7..b64db405ba5c 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -540,7 +540,7 @@ static s64 alarm_timer_forward(struct k_itimer *timr, ktime_t now)
{
struct alarm *alarm = &timr->it.alarm.alarmtimer;
- return alarm_forward(alarm, timr->it_interval, now);
+ return alarm_forward(alarm, now, timr->it_interval);
}
/**
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 0b040a417442..af7079aa0f36 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -2752,6 +2752,10 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
if (!is_kprobe_multi(prog))
return -EINVAL;
+ /* kprobe_multi is not allowed to be sleepable. */
+ if (prog->sleepable)
+ return -EINVAL;
+
/* Writing to context is not allowed for kprobes. */
if (prog->aux->kprobe_write_ctx)
return -EINVAL;
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index d5230b759a2d..655db2e82513 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -22,6 +22,39 @@ static struct task_struct *trigger_kthread;
static struct llist_head trigger_data_free_list;
static DEFINE_MUTEX(trigger_data_kthread_mutex);
+static int trigger_kthread_fn(void *ignore);
+
+static void trigger_create_kthread_locked(void)
+{
+ lockdep_assert_held(&trigger_data_kthread_mutex);
+
+ if (!trigger_kthread) {
+ struct task_struct *kthread;
+
+ kthread = kthread_create(trigger_kthread_fn, NULL,
+ "trigger_data_free");
+ if (!IS_ERR(kthread))
+ WRITE_ONCE(trigger_kthread, kthread);
+ }
+}
+
+static void trigger_data_free_queued_locked(void)
+{
+ struct event_trigger_data *data, *tmp;
+ struct llist_node *llnodes;
+
+ lockdep_assert_held(&trigger_data_kthread_mutex);
+
+ llnodes = llist_del_all(&trigger_data_free_list);
+ if (!llnodes)
+ return;
+
+ tracepoint_synchronize_unregister();
+
+ llist_for_each_entry_safe(data, tmp, llnodes, llist)
+ kfree(data);
+}
+
/* Bulk garbage collection of event_trigger_data elements */
static int trigger_kthread_fn(void *ignore)
{
@@ -56,30 +89,50 @@ void trigger_data_free(struct event_trigger_data *data)
if (data->cmd_ops->set_filter)
data->cmd_ops->set_filter(NULL, data, NULL);
+ /*
+ * Boot-time trigger registration can fail before kthread creation
+ * works. Keep the deferred-free semantics during boot and let late
+ * init start the kthread to drain the list.
+ */
+ if (system_state == SYSTEM_BOOTING && !trigger_kthread) {
+ llist_add(&data->llist, &trigger_data_free_list);
+ return;
+ }
+
if (unlikely(!trigger_kthread)) {
guard(mutex)(&trigger_data_kthread_mutex);
+
+ trigger_create_kthread_locked();
/* Check again after taking mutex */
if (!trigger_kthread) {
- struct task_struct *kthread;
-
- kthread = kthread_create(trigger_kthread_fn, NULL,
- "trigger_data_free");
- if (!IS_ERR(kthread))
- WRITE_ONCE(trigger_kthread, kthread);
+ llist_add(&data->llist, &trigger_data_free_list);
+ /* Drain the queued frees synchronously if creation failed. */
+ trigger_data_free_queued_locked();
+ return;
}
}
- if (!trigger_kthread) {
- /* Do it the slow way */
- tracepoint_synchronize_unregister();
- kfree(data);
- return;
- }
-
llist_add(&data->llist, &trigger_data_free_list);
wake_up_process(trigger_kthread);
}
+static int __init trigger_data_free_init(void)
+{
+ guard(mutex)(&trigger_data_kthread_mutex);
+
+ if (llist_empty(&trigger_data_free_list))
+ return 0;
+
+ trigger_create_kthread_locked();
+ if (trigger_kthread)
+ wake_up_process(trigger_kthread);
+ else
+ trigger_data_free_queued_locked();
+
+ return 0;
+}
+late_initcall(trigger_data_free_init);
+
static inline void data_ops_trigger(struct event_trigger_data *data,
struct trace_buffer *buffer, void *rec,
struct ring_buffer_event *event)
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index dee610e465b9..be6cf0bb3c03 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -2073,8 +2073,8 @@ static void osnoise_hotplug_workfn(struct work_struct *dummy)
if (!osnoise_has_registered_instances())
return;
- guard(mutex)(&interface_lock);
guard(cpus_read_lock)();
+ guard(mutex)(&interface_lock);
if (!cpu_online(cpu))
return;
@@ -2237,11 +2237,11 @@ static ssize_t osnoise_options_write(struct file *filp, const char __user *ubuf,
if (running)
stop_per_cpu_kthreads();
- mutex_lock(&interface_lock);
/*
* avoid CPU hotplug operations that might read options.
*/
cpus_read_lock();
+ mutex_lock(&interface_lock);
retval = cnt;
@@ -2257,8 +2257,8 @@ static ssize_t osnoise_options_write(struct file *filp, const char __user *ubuf,
clear_bit(option, &osnoise_options);
}
- cpus_read_unlock();
mutex_unlock(&interface_lock);
+ cpus_read_unlock();
if (running)
start_per_cpu_kthreads();
@@ -2345,16 +2345,16 @@ osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count,
if (running)
stop_per_cpu_kthreads();
- mutex_lock(&interface_lock);
/*
* osnoise_cpumask is read by CPU hotplug operations.
*/
cpus_read_lock();
+ mutex_lock(&interface_lock);
cpumask_copy(&osnoise_cpumask, osnoise_cpumask_new);
- cpus_read_unlock();
mutex_unlock(&interface_lock);
+ cpus_read_unlock();
if (running)
start_per_cpu_kthreads();
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index b77119d71641..eda756556341 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -7699,8 +7699,29 @@ static void wq_watchdog_timer_fn(struct timer_list *unused)
else
ts = touched;
- /* did we stall? */
+ /*
+ * Did we stall?
+ *
+ * Do a lockless check first to do not disturb the system.
+ *
+ * Prevent false positives by double checking the timestamp
+ * under pool->lock. The lock makes sure that the check reads
+ * an updated pool->last_progress_ts when this CPU saw
+ * an already updated pool->worklist above. It seems better
+ * than adding another barrier into __queue_work() which
+ * is a hotter path.
+ */
if (time_after(now, ts + thresh)) {
+ scoped_guard(raw_spinlock_irqsave, &pool->lock) {
+ pool_ts = pool->last_progress_ts;
+ if (time_after(pool_ts, touched))
+ ts = pool_ts;
+ else
+ ts = touched;
+ }
+ if (!time_after(now, ts + thresh))
+ continue;
+
lockup_detected = true;
stall_time = jiffies_to_msecs(now - pool_ts) / 1000;
max_stall_time = max(max_stall_time, stall_time);
@@ -7712,8 +7733,6 @@ static void wq_watchdog_timer_fn(struct timer_list *unused)
pr_cont_pool_info(pool);
pr_cont(" stuck for %us!\n", stall_time);
}
-
-
}
if (lockup_detected)
diff --git a/lib/bug.c b/lib/bug.c
index 623c467a8b76..aab9e6a40c5f 100644
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -173,10 +173,8 @@ struct bug_entry *find_bug(unsigned long bugaddr)
return module_find_bug(bugaddr);
}
-__diag_push();
-__diag_ignore(GCC, all, "-Wsuggest-attribute=format",
- "Not a valid __printf() conversion candidate.");
-static void __warn_printf(const char *fmt, struct pt_regs *regs)
+static __printf(1, 0)
+void __warn_printf(const char *fmt, struct pt_regs *regs)
{
if (!fmt)
return;
@@ -195,7 +193,6 @@ static void __warn_printf(const char *fmt, struct pt_regs *regs)
printk("%s", fmt);
}
-__diag_pop();
static enum bug_trap_type __report_bug(struct bug_entry *bug, unsigned long bugaddr, struct pt_regs *regs)
{
diff --git a/lib/crypto/chacha-block-generic.c b/lib/crypto/chacha-block-generic.c
index 77f68de71066..4a6d627580cb 100644
--- a/lib/crypto/chacha-block-generic.c
+++ b/lib/crypto/chacha-block-generic.c
@@ -87,6 +87,8 @@ void chacha_block_generic(struct chacha_state *state,
&out[i * sizeof(u32)]);
state->x[12]++;
+
+ chacha_zeroize_state(&permuted_state);
}
EXPORT_SYMBOL(chacha_block_generic);
@@ -110,5 +112,7 @@ void hchacha_block_generic(const struct chacha_state *state,
memcpy(&out[0], &permuted_state.x[0], 16);
memcpy(&out[4], &permuted_state.x[12], 16);
+
+ chacha_zeroize_state(&permuted_state);
}
EXPORT_SYMBOL(hchacha_block_generic);
diff --git a/mm/damon/core.c b/mm/damon/core.c
index c1d1091d307e..3e1890d64d06 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -1252,6 +1252,7 @@ int damon_commit_ctx(struct damon_ctx *dst, struct damon_ctx *src)
{
int err;
+ dst->maybe_corrupted = true;
if (!is_power_of_2(src->min_region_sz))
return -EINVAL;
@@ -1277,6 +1278,7 @@ int damon_commit_ctx(struct damon_ctx *dst, struct damon_ctx *src)
dst->addr_unit = src->addr_unit;
dst->min_region_sz = src->min_region_sz;
+ dst->maybe_corrupted = false;
return 0;
}
@@ -2678,6 +2680,8 @@ static void kdamond_call(struct damon_ctx *ctx, bool cancel)
complete(&control->completion);
else if (control->canceled && control->dealloc_on_cancel)
kfree(control);
+ if (!cancel && ctx->maybe_corrupted)
+ break;
}
mutex_lock(&ctx->call_controls_lock);
@@ -2707,6 +2711,8 @@ static int kdamond_wait_activation(struct damon_ctx *ctx)
kdamond_usleep(min_wait_time);
kdamond_call(ctx, false);
+ if (ctx->maybe_corrupted)
+ return -EINVAL;
damos_walk_cancel(ctx);
}
return -EBUSY;
@@ -2790,6 +2796,8 @@ static int kdamond_fn(void *data)
* kdamond_merge_regions() if possible, to reduce overhead
*/
kdamond_call(ctx, false);
+ if (ctx->maybe_corrupted)
+ break;
if (!list_empty(&ctx->schemes))
kdamond_apply_schemes(ctx);
else
diff --git a/mm/damon/stat.c b/mm/damon/stat.c
index 25fb44ccf99d..cf2c5a541eee 100644
--- a/mm/damon/stat.c
+++ b/mm/damon/stat.c
@@ -145,12 +145,59 @@ static int damon_stat_damon_call_fn(void *data)
return 0;
}
+struct damon_stat_system_ram_range_walk_arg {
+ bool walked;
+ struct resource res;
+};
+
+static int damon_stat_system_ram_walk_fn(struct resource *res, void *arg)
+{
+ struct damon_stat_system_ram_range_walk_arg *a = arg;
+
+ if (!a->walked) {
+ a->walked = true;
+ a->res.start = res->start;
+ }
+ a->res.end = res->end;
+ return 0;
+}
+
+static unsigned long damon_stat_res_to_core_addr(resource_size_t ra,
+ unsigned long addr_unit)
+{
+ /*
+ * Use div_u64() for avoiding linking errors related with __udivdi3,
+ * __aeabi_uldivmod, or similar problems. This should also improve the
+ * performance optimization (read div_u64() comment for the detail).
+ */
+ if (sizeof(ra) == 8 && sizeof(addr_unit) == 4)
+ return div_u64(ra, addr_unit);
+ return ra / addr_unit;
+}
+
+static int damon_stat_set_monitoring_region(struct damon_target *t,
+ unsigned long addr_unit, unsigned long min_region_sz)
+{
+ struct damon_addr_range addr_range;
+ struct damon_stat_system_ram_range_walk_arg arg = {};
+
+ walk_system_ram_res(0, -1, &arg, damon_stat_system_ram_walk_fn);
+ if (!arg.walked)
+ return -EINVAL;
+ addr_range.start = damon_stat_res_to_core_addr(
+ arg.res.start, addr_unit);
+ addr_range.end = damon_stat_res_to_core_addr(
+ arg.res.end + 1, addr_unit);
+ if (addr_range.end <= addr_range.start)
+ return -EINVAL;
+ return damon_set_regions(t, &addr_range, 1, min_region_sz);
+}
+
static struct damon_ctx *damon_stat_build_ctx(void)
{
struct damon_ctx *ctx;
struct damon_attrs attrs;
struct damon_target *target;
- unsigned long start = 0, end = 0;
ctx = damon_new_ctx();
if (!ctx)
@@ -180,8 +227,8 @@ static struct damon_ctx *damon_stat_build_ctx(void)
if (!target)
goto free_out;
damon_add_target(ctx, target);
- if (damon_set_region_biggest_system_ram_default(target, &start, &end,
- ctx->min_region_sz))
+ if (damon_stat_set_monitoring_region(target, ctx->addr_unit,
+ ctx->min_region_sz))
goto free_out;
return ctx;
free_out:
diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c
index 576d1ddd736b..6a44a2f3d8fc 100644
--- a/mm/damon/sysfs.c
+++ b/mm/damon/sysfs.c
@@ -1524,8 +1524,10 @@ static int damon_sysfs_commit_input(void *data)
if (IS_ERR(param_ctx))
return PTR_ERR(param_ctx);
test_ctx = damon_sysfs_new_test_ctx(kdamond->damon_ctx);
- if (!test_ctx)
+ if (!test_ctx) {
+ damon_destroy_ctx(param_ctx);
return -ENOMEM;
+ }
err = damon_commit_ctx(test_ctx, param_ctx);
if (err)
goto out;
@@ -1618,9 +1620,12 @@ static int damon_sysfs_repeat_call_fn(void *data)
if (!mutex_trylock(&damon_sysfs_lock))
return 0;
+ if (sysfs_kdamond->contexts->nr != 1)
+ goto out;
damon_sysfs_upd_tuned_intervals(sysfs_kdamond);
damon_sysfs_upd_schemes_stats(sysfs_kdamond);
damon_sysfs_upd_schemes_effective_quotas(sysfs_kdamond);
+out:
mutex_unlock(&damon_sysfs_lock);
return 0;
}
@@ -1747,6 +1752,9 @@ static int damon_sysfs_update_schemes_tried_regions(
static int damon_sysfs_handle_cmd(enum damon_sysfs_cmd cmd,
struct damon_sysfs_kdamond *kdamond)
{
+ if (cmd != DAMON_SYSFS_CMD_OFF && kdamond->contexts->nr != 1)
+ return -EINVAL;
+
switch (cmd) {
case DAMON_SYSFS_CMD_ON:
return damon_sysfs_turn_damon_on(kdamond);
diff --git a/mm/hmm.c b/mm/hmm.c
index f6c4ddff4bd6..5955f2f0c83d 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -778,7 +778,7 @@ dma_addr_t hmm_dma_map_pfn(struct device *dev, struct hmm_dma_map *map,
struct page *page = hmm_pfn_to_page(pfns[idx]);
phys_addr_t paddr = hmm_pfn_to_phys(pfns[idx]);
size_t offset = idx * map->dma_entry_size;
- unsigned long attrs = 0;
+ unsigned long attrs = DMA_ATTR_REQUIRE_COHERENT;
dma_addr_t dma_addr;
int ret;
@@ -871,7 +871,7 @@ bool hmm_dma_unmap_pfn(struct device *dev, struct hmm_dma_map *map, size_t idx)
struct dma_iova_state *state = &map->state;
dma_addr_t *dma_addrs = map->dma_list;
unsigned long *pfns = map->pfn_list;
- unsigned long attrs = 0;
+ unsigned long attrs = DMA_ATTR_REQUIRE_COHERENT;
if ((pfns[idx] & valid_dma) != valid_dma)
return false;
diff --git a/mm/memory.c b/mm/memory.c
index 2f815a34d924..c65e82c86fed 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -6815,11 +6815,16 @@ retry:
pudp = pud_offset(p4dp, address);
pud = pudp_get(pudp);
- if (pud_none(pud))
+ if (!pud_present(pud))
goto out;
if (pud_leaf(pud)) {
lock = pud_lock(mm, pudp);
- if (!unlikely(pud_leaf(pud))) {
+ pud = pudp_get(pudp);
+
+ if (unlikely(!pud_present(pud))) {
+ spin_unlock(lock);
+ goto out;
+ } else if (unlikely(!pud_leaf(pud))) {
spin_unlock(lock);
goto retry;
}
@@ -6831,9 +6836,16 @@ retry:
pmdp = pmd_offset(pudp, address);
pmd = pmdp_get_lockless(pmdp);
+ if (!pmd_present(pmd))
+ goto out;
if (pmd_leaf(pmd)) {
lock = pmd_lock(mm, pmdp);
- if (!unlikely(pmd_leaf(pmd))) {
+ pmd = pmdp_get(pmdp);
+
+ if (unlikely(!pmd_present(pmd))) {
+ spin_unlock(lock);
+ goto out;
+ } else if (unlikely(!pmd_leaf(pmd))) {
spin_unlock(lock);
goto retry;
}
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 0e5175f1c767..cf92bd6a8226 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -487,7 +487,13 @@ void __mpol_put(struct mempolicy *pol)
{
if (!atomic_dec_and_test(&pol->refcnt))
return;
- kmem_cache_free(policy_cache, pol);
+ /*
+ * Required to allow mmap_lock_speculative*() access, see for example
+ * futex_key_to_node_opt(). All accesses are serialized by mmap_lock,
+ * however the speculative lock section unbound by the normal lock
+ * boundaries, requiring RCU freeing.
+ */
+ kfree_rcu(pol, rcu);
}
EXPORT_SYMBOL_FOR_MODULES(__mpol_put, "kvm");
@@ -1020,7 +1026,7 @@ static int vma_replace_policy(struct vm_area_struct *vma,
}
old = vma->vm_policy;
- vma->vm_policy = new; /* protected by mmap_lock */
+ WRITE_ONCE(vma->vm_policy, new); /* protected by mmap_lock */
mpol_put(old);
return 0;
diff --git a/mm/mseal.c b/mm/mseal.c
index 316b5e1dec78..ac58643181f7 100644
--- a/mm/mseal.c
+++ b/mm/mseal.c
@@ -56,7 +56,6 @@ static int mseal_apply(struct mm_struct *mm,
unsigned long start, unsigned long end)
{
struct vm_area_struct *vma, *prev;
- unsigned long curr_start = start;
VMA_ITERATOR(vmi, mm, start);
/* We know there are no gaps so this will be non-NULL. */
@@ -66,6 +65,7 @@ static int mseal_apply(struct mm_struct *mm,
prev = vma;
for_each_vma_range(vmi, vma, end) {
+ const unsigned long curr_start = MAX(vma->vm_start, start);
const unsigned long curr_end = MIN(vma->vm_end, end);
if (!(vma->vm_flags & VM_SEALED)) {
@@ -79,7 +79,6 @@ static int mseal_apply(struct mm_struct *mm,
}
prev = vma;
- curr_start = curr_end;
}
return 0;
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index a94c401ab2cf..4e7bcd975c54 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -97,6 +97,7 @@ static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
struct mm_walk *walk)
{
+ pud_t pudval = pudp_get(pud);
pmd_t *pmd;
unsigned long next;
const struct mm_walk_ops *ops = walk->ops;
@@ -105,6 +106,24 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
int err = 0;
int depth = real_depth(3);
+ /*
+ * For PTE handling, pte_offset_map_lock() takes care of checking
+ * whether there actually is a page table. But it also has to be
+ * very careful about concurrent page table reclaim.
+ *
+ * Similarly, we have to be careful here - a PUD entry that points
+ * to a PMD table cannot go away, so we can just walk it. But if
+ * it's something else, we need to ensure we didn't race something,
+ * so need to retry.
+ *
+ * A pertinent example of this is a PUD refault after PUD split -
+ * we will need to split again or risk accessing invalid memory.
+ */
+ if (!pud_present(pudval) || pud_leaf(pudval)) {
+ walk->action = ACTION_AGAIN;
+ return 0;
+ }
+
pmd = pmd_offset(pud, addr);
do {
again:
@@ -218,12 +237,12 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
else if (pud_leaf(*pud) || !pud_present(*pud))
continue; /* Nothing to do. */
- if (pud_none(*pud))
- goto again;
-
err = walk_pmd_range(pud, addr, next, walk);
if (err)
break;
+
+ if (walk->action == ACTION_AGAIN)
+ goto again;
} while (pud++, addr = next, addr != end);
return err;
diff --git a/mm/rmap.c b/mm/rmap.c
index 391337282e3f..8f08090d7eb9 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -457,6 +457,13 @@ static void cleanup_partial_anon_vmas(struct vm_area_struct *vma)
list_del(&avc->same_vma);
anon_vma_chain_free(avc);
}
+
+ /*
+ * The anon_vma assigned to this VMA is no longer valid, as we were not
+ * able to correctly clone AVC state. Avoid inconsistent anon_vma tree
+ * state by resetting.
+ */
+ vma->anon_vma = NULL;
}
/**
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 6d0eef7470be..48aff2c917c0 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -494,6 +494,10 @@ static struct folio *__swap_cache_prepare_and_add(swp_entry_t entry,
__folio_set_locked(folio);
__folio_set_swapbacked(folio);
+
+ if (!charged && mem_cgroup_swapin_charge_folio(folio, NULL, gfp, entry))
+ goto failed;
+
for (;;) {
ret = swap_cache_add_folio(folio, entry, &shadow);
if (!ret)
@@ -514,11 +518,6 @@ static struct folio *__swap_cache_prepare_and_add(swp_entry_t entry,
goto failed;
}
- if (!charged && mem_cgroup_swapin_charge_folio(folio, NULL, gfp, entry)) {
- swap_cache_del_folio(folio);
- goto failed;
- }
-
memcg1_swapin(entry, folio_nr_pages(folio));
if (shadow)
workingset_refault(folio, shadow);
diff --git a/mm/zswap.c b/mm/zswap.c
index e6ec3295bdb0..16b2ef7223e1 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -942,9 +942,15 @@ static bool zswap_decompress(struct zswap_entry *entry, struct folio *folio)
/* zswap entries of length PAGE_SIZE are not compressed. */
if (entry->length == PAGE_SIZE) {
+ void *dst;
+
WARN_ON_ONCE(input->length != PAGE_SIZE);
- memcpy_from_sglist(kmap_local_folio(folio, 0), input, 0, PAGE_SIZE);
+
+ dst = kmap_local_folio(folio, 0);
+ memcpy_from_sglist(dst, input, 0, PAGE_SIZE);
dlen = PAGE_SIZE;
+ kunmap_local(dst);
+ flush_dcache_folio(folio);
} else {
sg_init_table(&output, 1);
sg_set_folio(&output, folio, PAGE_SIZE, 0);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 6eb59e9f2aa8..11d3ad8d2551 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -1843,9 +1843,13 @@ static int set_cig_params_sync(struct hci_dev *hdev, void *data)
u8 aux_num_cis = 0;
u8 cis_id;
+ hci_dev_lock(hdev);
+
conn = hci_conn_hash_lookup_cig(hdev, cig_id);
- if (!conn)
+ if (!conn) {
+ hci_dev_unlock(hdev);
return 0;
+ }
qos = &conn->iso_qos;
pdu->cig_id = cig_id;
@@ -1884,6 +1888,8 @@ static int set_cig_params_sync(struct hci_dev *hdev, void *data)
}
pdu->num_cis = aux_num_cis;
+ hci_dev_unlock(hdev);
+
if (!pdu->num_cis)
return 0;
@@ -3095,7 +3101,7 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason)
* hci_connect_le serializes the connection attempts so only one
* connection can be in BT_CONNECT at time.
*/
- if (conn->state == BT_CONNECT && hdev->req_status == HCI_REQ_PEND) {
+ if (conn->state == BT_CONNECT && READ_ONCE(hdev->req_status) == HCI_REQ_PEND) {
switch (hci_skb_event(hdev->sent_cmd)) {
case HCI_EV_CONN_COMPLETE:
case HCI_EV_LE_CONN_COMPLETE:
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 31308c1de4ec..01f8ceeb1c0c 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -4126,7 +4126,7 @@ static int hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb)
kfree_skb(skb);
}
- if (hdev->req_status == HCI_REQ_PEND &&
+ if (READ_ONCE(hdev->req_status) == HCI_REQ_PEND &&
!hci_dev_test_and_set_flag(hdev, HCI_CMD_PENDING)) {
kfree_skb(hdev->req_skb);
hdev->req_skb = skb_clone(hdev->sent_cmd, GFP_KERNEL);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 286529d2e554..3ebc5e6d45d9 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -80,6 +80,10 @@ static void *hci_le_ev_skb_pull(struct hci_dev *hdev, struct sk_buff *skb,
return data;
}
+static void hci_store_wake_reason(struct hci_dev *hdev,
+ const bdaddr_t *bdaddr, u8 addr_type)
+ __must_hold(&hdev->lock);
+
static u8 hci_cc_inquiry_cancel(struct hci_dev *hdev, void *data,
struct sk_buff *skb)
{
@@ -3111,6 +3115,7 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
bt_dev_dbg(hdev, "status 0x%2.2x", status);
hci_dev_lock(hdev);
+ hci_store_wake_reason(hdev, &ev->bdaddr, BDADDR_BREDR);
/* Check for existing connection:
*
@@ -3274,6 +3279,10 @@ static void hci_conn_request_evt(struct hci_dev *hdev, void *data,
bt_dev_dbg(hdev, "bdaddr %pMR type 0x%x", &ev->bdaddr, ev->link_type);
+ hci_dev_lock(hdev);
+ hci_store_wake_reason(hdev, &ev->bdaddr, BDADDR_BREDR);
+ hci_dev_unlock(hdev);
+
/* Reject incoming connection from device with same BD ADDR against
* CVE-2020-26555
*/
@@ -5021,6 +5030,7 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data,
bt_dev_dbg(hdev, "status 0x%2.2x", status);
hci_dev_lock(hdev);
+ hci_store_wake_reason(hdev, &ev->bdaddr, BDADDR_BREDR);
conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr);
if (!conn) {
@@ -5713,6 +5723,7 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
int err;
hci_dev_lock(hdev);
+ hci_store_wake_reason(hdev, bdaddr, bdaddr_type);
/* All controllers implicitly stop advertising in the event of a
* connection, so ensure that the state bit is cleared.
@@ -6005,6 +6016,7 @@ static void hci_le_past_received_evt(struct hci_dev *hdev, void *data,
bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
hci_dev_lock(hdev);
+ hci_store_wake_reason(hdev, &ev->bdaddr, ev->bdaddr_type);
hci_dev_clear_flag(hdev, HCI_PA_SYNC);
@@ -6403,6 +6415,8 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, void *data,
info->length + 1))
break;
+ hci_store_wake_reason(hdev, &info->bdaddr, info->bdaddr_type);
+
if (info->length <= max_adv_len(hdev)) {
rssi = info->data[info->length];
process_adv_report(hdev, info->type, &info->bdaddr,
@@ -6491,6 +6505,8 @@ static void hci_le_ext_adv_report_evt(struct hci_dev *hdev, void *data,
info->length))
break;
+ hci_store_wake_reason(hdev, &info->bdaddr, info->bdaddr_type);
+
evt_type = __le16_to_cpu(info->type) & LE_EXT_ADV_EVT_TYPE_MASK;
legacy_evt_type = ext_evt_type_to_legacy(hdev, evt_type);
@@ -6536,6 +6552,7 @@ static void hci_le_pa_sync_established_evt(struct hci_dev *hdev, void *data,
bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
hci_dev_lock(hdev);
+ hci_store_wake_reason(hdev, &ev->bdaddr, ev->bdaddr_type);
hci_dev_clear_flag(hdev, HCI_PA_SYNC);
@@ -6767,25 +6784,31 @@ static void hci_le_remote_conn_param_req_evt(struct hci_dev *hdev, void *data,
latency = le16_to_cpu(ev->latency);
timeout = le16_to_cpu(ev->timeout);
+ hci_dev_lock(hdev);
+
hcon = hci_conn_hash_lookup_handle(hdev, handle);
- if (!hcon || hcon->state != BT_CONNECTED)
- return send_conn_param_neg_reply(hdev, handle,
- HCI_ERROR_UNKNOWN_CONN_ID);
+ if (!hcon || hcon->state != BT_CONNECTED) {
+ send_conn_param_neg_reply(hdev, handle,
+ HCI_ERROR_UNKNOWN_CONN_ID);
+ goto unlock;
+ }
- if (max > hcon->le_conn_max_interval)
- return send_conn_param_neg_reply(hdev, handle,
- HCI_ERROR_INVALID_LL_PARAMS);
+ if (max > hcon->le_conn_max_interval) {
+ send_conn_param_neg_reply(hdev, handle,
+ HCI_ERROR_INVALID_LL_PARAMS);
+ goto unlock;
+ }
- if (hci_check_conn_params(min, max, latency, timeout))
- return send_conn_param_neg_reply(hdev, handle,
- HCI_ERROR_INVALID_LL_PARAMS);
+ if (hci_check_conn_params(min, max, latency, timeout)) {
+ send_conn_param_neg_reply(hdev, handle,
+ HCI_ERROR_INVALID_LL_PARAMS);
+ goto unlock;
+ }
if (hcon->role == HCI_ROLE_MASTER) {
struct hci_conn_params *params;
u8 store_hint;
- hci_dev_lock(hdev);
-
params = hci_conn_params_lookup(hdev, &hcon->dst,
hcon->dst_type);
if (params) {
@@ -6798,8 +6821,6 @@ static void hci_le_remote_conn_param_req_evt(struct hci_dev *hdev, void *data,
store_hint = 0x00;
}
- hci_dev_unlock(hdev);
-
mgmt_new_conn_param(hdev, &hcon->dst, hcon->dst_type,
store_hint, min, max, latency, timeout);
}
@@ -6813,6 +6834,9 @@ static void hci_le_remote_conn_param_req_evt(struct hci_dev *hdev, void *data,
cp.max_ce_len = 0;
hci_send_cmd(hdev, HCI_OP_LE_CONN_PARAM_REQ_REPLY, sizeof(cp), &cp);
+
+unlock:
+ hci_dev_unlock(hdev);
}
static void hci_le_direct_adv_report_evt(struct hci_dev *hdev, void *data,
@@ -6834,6 +6858,8 @@ static void hci_le_direct_adv_report_evt(struct hci_dev *hdev, void *data,
for (i = 0; i < ev->num; i++) {
struct hci_ev_le_direct_adv_info *info = &ev->info[i];
+ hci_store_wake_reason(hdev, &info->bdaddr, info->bdaddr_type);
+
process_adv_report(hdev, info->type, &info->bdaddr,
info->bdaddr_type, &info->direct_addr,
info->direct_addr_type, HCI_ADV_PHY_1M, 0,
@@ -7517,73 +7543,29 @@ static bool hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode,
return true;
}
-static void hci_store_wake_reason(struct hci_dev *hdev, u8 event,
- struct sk_buff *skb)
+static void hci_store_wake_reason(struct hci_dev *hdev,
+ const bdaddr_t *bdaddr, u8 addr_type)
+ __must_hold(&hdev->lock)
{
- struct hci_ev_le_advertising_info *adv;
- struct hci_ev_le_direct_adv_info *direct_adv;
- struct hci_ev_le_ext_adv_info *ext_adv;
- const struct hci_ev_conn_complete *conn_complete = (void *)skb->data;
- const struct hci_ev_conn_request *conn_request = (void *)skb->data;
-
- hci_dev_lock(hdev);
+ lockdep_assert_held(&hdev->lock);
/* If we are currently suspended and this is the first BT event seen,
* save the wake reason associated with the event.
*/
if (!hdev->suspended || hdev->wake_reason)
- goto unlock;
+ return;
+
+ if (!bdaddr) {
+ hdev->wake_reason = MGMT_WAKE_REASON_UNEXPECTED;
+ return;
+ }
/* Default to remote wake. Values for wake_reason are documented in the
* Bluez mgmt api docs.
*/
hdev->wake_reason = MGMT_WAKE_REASON_REMOTE_WAKE;
-
- /* Once configured for remote wakeup, we should only wake up for
- * reconnections. It's useful to see which device is waking us up so
- * keep track of the bdaddr of the connection event that woke us up.
- */
- if (event == HCI_EV_CONN_REQUEST) {
- bacpy(&hdev->wake_addr, &conn_request->bdaddr);
- hdev->wake_addr_type = BDADDR_BREDR;
- } else if (event == HCI_EV_CONN_COMPLETE) {
- bacpy(&hdev->wake_addr, &conn_complete->bdaddr);
- hdev->wake_addr_type = BDADDR_BREDR;
- } else if (event == HCI_EV_LE_META) {
- struct hci_ev_le_meta *le_ev = (void *)skb->data;
- u8 subevent = le_ev->subevent;
- u8 *ptr = &skb->data[sizeof(*le_ev)];
- u8 num_reports = *ptr;
-
- if ((subevent == HCI_EV_LE_ADVERTISING_REPORT ||
- subevent == HCI_EV_LE_DIRECT_ADV_REPORT ||
- subevent == HCI_EV_LE_EXT_ADV_REPORT) &&
- num_reports) {
- adv = (void *)(ptr + 1);
- direct_adv = (void *)(ptr + 1);
- ext_adv = (void *)(ptr + 1);
-
- switch (subevent) {
- case HCI_EV_LE_ADVERTISING_REPORT:
- bacpy(&hdev->wake_addr, &adv->bdaddr);
- hdev->wake_addr_type = adv->bdaddr_type;
- break;
- case HCI_EV_LE_DIRECT_ADV_REPORT:
- bacpy(&hdev->wake_addr, &direct_adv->bdaddr);
- hdev->wake_addr_type = direct_adv->bdaddr_type;
- break;
- case HCI_EV_LE_EXT_ADV_REPORT:
- bacpy(&hdev->wake_addr, &ext_adv->bdaddr);
- hdev->wake_addr_type = ext_adv->bdaddr_type;
- break;
- }
- }
- } else {
- hdev->wake_reason = MGMT_WAKE_REASON_UNEXPECTED;
- }
-
-unlock:
- hci_dev_unlock(hdev);
+ bacpy(&hdev->wake_addr, bdaddr);
+ hdev->wake_addr_type = addr_type;
}
#define HCI_EV_VL(_op, _func, _min_len, _max_len) \
@@ -7830,14 +7812,15 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
skb_pull(skb, HCI_EVENT_HDR_SIZE);
- /* Store wake reason if we're suspended */
- hci_store_wake_reason(hdev, event, skb);
-
bt_dev_dbg(hdev, "event 0x%2.2x", event);
hci_event_func(hdev, event, skb, &opcode, &status, &req_complete,
&req_complete_skb);
+ hci_dev_lock(hdev);
+ hci_store_wake_reason(hdev, NULL, 0);
+ hci_dev_unlock(hdev);
+
if (req_complete) {
req_complete(hdev, status, opcode);
} else if (req_complete_skb) {
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 3166914b0d6c..919ec275dd23 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -25,11 +25,11 @@ static void hci_cmd_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode,
{
bt_dev_dbg(hdev, "result 0x%2.2x", result);
- if (hdev->req_status != HCI_REQ_PEND)
+ if (READ_ONCE(hdev->req_status) != HCI_REQ_PEND)
return;
hdev->req_result = result;
- hdev->req_status = HCI_REQ_DONE;
+ WRITE_ONCE(hdev->req_status, HCI_REQ_DONE);
/* Free the request command so it is not used as response */
kfree_skb(hdev->req_skb);
@@ -167,20 +167,20 @@ struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen,
hci_cmd_sync_add(&req, opcode, plen, param, event, sk);
- hdev->req_status = HCI_REQ_PEND;
+ WRITE_ONCE(hdev->req_status, HCI_REQ_PEND);
err = hci_req_sync_run(&req);
if (err < 0)
return ERR_PTR(err);
err = wait_event_interruptible_timeout(hdev->req_wait_q,
- hdev->req_status != HCI_REQ_PEND,
+ READ_ONCE(hdev->req_status) != HCI_REQ_PEND,
timeout);
if (err == -ERESTARTSYS)
return ERR_PTR(-EINTR);
- switch (hdev->req_status) {
+ switch (READ_ONCE(hdev->req_status)) {
case HCI_REQ_DONE:
err = -bt_to_errno(hdev->req_result);
break;
@@ -194,7 +194,7 @@ struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen,
break;
}
- hdev->req_status = 0;
+ WRITE_ONCE(hdev->req_status, 0);
hdev->req_result = 0;
skb = hdev->req_rsp;
hdev->req_rsp = NULL;
@@ -665,9 +665,9 @@ void hci_cmd_sync_cancel(struct hci_dev *hdev, int err)
{
bt_dev_dbg(hdev, "err 0x%2.2x", err);
- if (hdev->req_status == HCI_REQ_PEND) {
+ if (READ_ONCE(hdev->req_status) == HCI_REQ_PEND) {
hdev->req_result = err;
- hdev->req_status = HCI_REQ_CANCELED;
+ WRITE_ONCE(hdev->req_status, HCI_REQ_CANCELED);
queue_work(hdev->workqueue, &hdev->cmd_sync_cancel_work);
}
@@ -683,12 +683,12 @@ void hci_cmd_sync_cancel_sync(struct hci_dev *hdev, int err)
{
bt_dev_dbg(hdev, "err 0x%2.2x", err);
- if (hdev->req_status == HCI_REQ_PEND) {
+ if (READ_ONCE(hdev->req_status) == HCI_REQ_PEND) {
/* req_result is __u32 so error must be positive to be properly
* propagated.
*/
hdev->req_result = err < 0 ? -err : err;
- hdev->req_status = HCI_REQ_CANCELED;
+ WRITE_ONCE(hdev->req_status, HCI_REQ_CANCELED);
wake_up_interruptible(&hdev->req_wait_q);
}
@@ -780,7 +780,7 @@ int hci_cmd_sync_queue_once(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
void *data, hci_cmd_sync_work_destroy_t destroy)
{
if (hci_cmd_sync_lookup_entry(hdev, func, data, destroy))
- return 0;
+ return -EEXIST;
return hci_cmd_sync_queue(hdev, func, data, destroy);
}
@@ -801,8 +801,15 @@ int hci_cmd_sync_run(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
return -ENETDOWN;
/* If on cmd_sync_work then run immediately otherwise queue */
- if (current_work() == &hdev->cmd_sync_work)
- return func(hdev, data);
+ if (current_work() == &hdev->cmd_sync_work) {
+ int err;
+
+ err = func(hdev, data);
+ if (destroy)
+ destroy(hdev, data, err);
+
+ return 0;
+ }
return hci_cmd_sync_submit(hdev, func, data, destroy);
}
@@ -3255,6 +3262,8 @@ static int update_passive_scan_sync(struct hci_dev *hdev, void *data)
int hci_update_passive_scan(struct hci_dev *hdev)
{
+ int err;
+
/* Only queue if it would have any effect */
if (!test_bit(HCI_UP, &hdev->flags) ||
test_bit(HCI_INIT, &hdev->flags) ||
@@ -3264,8 +3273,9 @@ int hci_update_passive_scan(struct hci_dev *hdev)
hci_dev_test_flag(hdev, HCI_UNREGISTER))
return 0;
- return hci_cmd_sync_queue_once(hdev, update_passive_scan_sync, NULL,
- NULL);
+ err = hci_cmd_sync_queue_once(hdev, update_passive_scan_sync, NULL,
+ NULL);
+ return (err == -EEXIST) ? 0 : err;
}
int hci_write_sc_support_sync(struct hci_dev *hdev, u8 val)
@@ -6958,8 +6968,11 @@ static int hci_acl_create_conn_sync(struct hci_dev *hdev, void *data)
int hci_connect_acl_sync(struct hci_dev *hdev, struct hci_conn *conn)
{
- return hci_cmd_sync_queue_once(hdev, hci_acl_create_conn_sync, conn,
- NULL);
+ int err;
+
+ err = hci_cmd_sync_queue_once(hdev, hci_acl_create_conn_sync, conn,
+ NULL);
+ return (err == -EEXIST) ? 0 : err;
}
static void create_le_conn_complete(struct hci_dev *hdev, void *data, int err)
@@ -6995,8 +7008,11 @@ done:
int hci_connect_le_sync(struct hci_dev *hdev, struct hci_conn *conn)
{
- return hci_cmd_sync_queue_once(hdev, hci_le_create_conn_sync, conn,
- create_le_conn_complete);
+ int err;
+
+ err = hci_cmd_sync_queue_once(hdev, hci_le_create_conn_sync, conn,
+ create_le_conn_complete);
+ return (err == -EEXIST) ? 0 : err;
}
int hci_cancel_connect_sync(struct hci_dev *hdev, struct hci_conn *conn)
@@ -7203,8 +7219,11 @@ done:
int hci_connect_pa_sync(struct hci_dev *hdev, struct hci_conn *conn)
{
- return hci_cmd_sync_queue_once(hdev, hci_le_pa_create_sync, conn,
- create_pa_complete);
+ int err;
+
+ err = hci_cmd_sync_queue_once(hdev, hci_le_pa_create_sync, conn,
+ create_pa_complete);
+ return (err == -EEXIST) ? 0 : err;
}
static void create_big_complete(struct hci_dev *hdev, void *data, int err)
@@ -7222,7 +7241,8 @@ static void create_big_complete(struct hci_dev *hdev, void *data, int err)
static int hci_le_big_create_sync(struct hci_dev *hdev, void *data)
{
- DEFINE_FLEX(struct hci_cp_le_big_create_sync, cp, bis, num_bis, 0x11);
+ DEFINE_FLEX(struct hci_cp_le_big_create_sync, cp, bis, num_bis,
+ HCI_MAX_ISO_BIS);
struct hci_conn *conn = data;
struct bt_iso_qos *qos = &conn->iso_qos;
int err;
@@ -7266,8 +7286,11 @@ static int hci_le_big_create_sync(struct hci_dev *hdev, void *data)
int hci_connect_big_sync(struct hci_dev *hdev, struct hci_conn *conn)
{
- return hci_cmd_sync_queue_once(hdev, hci_le_big_create_sync, conn,
- create_big_complete);
+ int err;
+
+ err = hci_cmd_sync_queue_once(hdev, hci_le_big_create_sync, conn,
+ create_big_complete);
+ return (err == -EEXIST) ? 0 : err;
}
struct past_data {
@@ -7359,7 +7382,7 @@ int hci_past_sync(struct hci_conn *conn, struct hci_conn *le)
if (err)
kfree(data);
- return err;
+ return (err == -EEXIST) ? 0 : err;
}
static void le_read_features_complete(struct hci_dev *hdev, void *data, int err)
@@ -7368,10 +7391,8 @@ static void le_read_features_complete(struct hci_dev *hdev, void *data, int err)
bt_dev_dbg(hdev, "err %d", err);
- if (err == -ECANCELED)
- return;
-
hci_conn_drop(conn);
+ hci_conn_put(conn);
}
static int hci_le_read_all_remote_features_sync(struct hci_dev *hdev,
@@ -7438,15 +7459,20 @@ int hci_le_read_remote_features(struct hci_conn *conn)
* role is possible. Otherwise just transition into the
* connected state without requesting the remote features.
*/
- if (conn->out || (hdev->le_features[0] & HCI_LE_PERIPHERAL_FEATURES))
+ if (conn->out || (hdev->le_features[0] & HCI_LE_PERIPHERAL_FEATURES)) {
err = hci_cmd_sync_queue_once(hdev,
hci_le_read_remote_features_sync,
- hci_conn_hold(conn),
+ hci_conn_hold(hci_conn_get(conn)),
le_read_features_complete);
- else
+ if (err) {
+ hci_conn_drop(conn);
+ hci_conn_put(conn);
+ }
+ } else {
err = -EOPNOTSUPP;
+ }
- return err;
+ return (err == -EEXIST) ? 0 : err;
}
static void pkt_type_changed(struct hci_dev *hdev, void *data, int err)
@@ -7472,6 +7498,7 @@ int hci_acl_change_pkt_type(struct hci_conn *conn, u16 pkt_type)
{
struct hci_dev *hdev = conn->hdev;
struct hci_cp_change_conn_ptype *cp;
+ int err;
cp = kmalloc_obj(*cp);
if (!cp)
@@ -7480,8 +7507,12 @@ int hci_acl_change_pkt_type(struct hci_conn *conn, u16 pkt_type)
cp->handle = cpu_to_le16(conn->handle);
cp->pkt_type = cpu_to_le16(pkt_type);
- return hci_cmd_sync_queue_once(hdev, hci_change_conn_ptype_sync, cp,
- pkt_type_changed);
+ err = hci_cmd_sync_queue_once(hdev, hci_change_conn_ptype_sync, cp,
+ pkt_type_changed);
+ if (err)
+ kfree(cp);
+
+ return (err == -EEXIST) ? 0 : err;
}
static void le_phy_update_complete(struct hci_dev *hdev, void *data, int err)
@@ -7507,6 +7538,7 @@ int hci_le_set_phy(struct hci_conn *conn, u8 tx_phys, u8 rx_phys)
{
struct hci_dev *hdev = conn->hdev;
struct hci_cp_le_set_phy *cp;
+ int err;
cp = kmalloc_obj(*cp);
if (!cp)
@@ -7517,6 +7549,10 @@ int hci_le_set_phy(struct hci_conn *conn, u8 tx_phys, u8 rx_phys)
cp->tx_phys = tx_phys;
cp->rx_phys = rx_phys;
- return hci_cmd_sync_queue_once(hdev, hci_le_set_phy_sync, cp,
- le_phy_update_complete);
+ err = hci_cmd_sync_queue_once(hdev, hci_le_set_phy_sync, cp,
+ le_phy_update_complete);
+ if (err)
+ kfree(cp);
+
+ return (err == -EEXIST) ? 0 : err;
}
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 5deb6c4f1e41..95c65fece39b 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -926,16 +926,39 @@ int l2cap_chan_check_security(struct l2cap_chan *chan, bool initiator)
static int l2cap_get_ident(struct l2cap_conn *conn)
{
+ u8 max;
+ int ident;
+
/* LE link does not support tools like l2ping so use the full range */
if (conn->hcon->type == LE_LINK)
- return ida_alloc_range(&conn->tx_ida, 1, 255, GFP_ATOMIC);
-
+ max = 255;
/* Get next available identificator.
* 1 - 128 are used by kernel.
* 129 - 199 are reserved.
* 200 - 254 are used by utilities like l2ping, etc.
*/
- return ida_alloc_range(&conn->tx_ida, 1, 128, GFP_ATOMIC);
+ else
+ max = 128;
+
+ /* Allocate ident using min as last used + 1 (cyclic) */
+ ident = ida_alloc_range(&conn->tx_ida, READ_ONCE(conn->tx_ident) + 1,
+ max, GFP_ATOMIC);
+ /* Force min 1 to start over */
+ if (ident <= 0) {
+ ident = ida_alloc_range(&conn->tx_ida, 1, max, GFP_ATOMIC);
+ if (ident <= 0) {
+ /* If all idents are in use, log an error, this is
+ * extremely unlikely to happen and would indicate a bug
+ * in the code that idents are not being freed properly.
+ */
+ BT_ERR("Unable to allocate ident: %d", ident);
+ return 0;
+ }
+ }
+
+ WRITE_ONCE(conn->tx_ident, ident);
+
+ return ident;
}
static void l2cap_send_acl(struct l2cap_conn *conn, struct sk_buff *skb,
@@ -1748,6 +1771,9 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err)
BT_DBG("hcon %p conn %p, err %d", hcon, conn, err);
+ disable_delayed_work_sync(&conn->info_timer);
+ disable_delayed_work_sync(&conn->id_addr_timer);
+
mutex_lock(&conn->lock);
kfree_skb(conn->rx_skb);
@@ -1763,8 +1789,6 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err)
ida_destroy(&conn->tx_ida);
- cancel_delayed_work_sync(&conn->id_addr_timer);
-
l2cap_unregister_all_users(conn);
/* Force the connection to be immediately dropped */
@@ -1783,9 +1807,6 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err)
l2cap_chan_put(chan);
}
- if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT)
- cancel_delayed_work_sync(&conn->info_timer);
-
hci_chan_del(conn->hchan);
conn->hchan = NULL;
@@ -2377,6 +2398,9 @@ static int l2cap_segment_sdu(struct l2cap_chan *chan,
/* Remote device may have requested smaller PDUs */
pdu_len = min_t(size_t, pdu_len, chan->remote_mps);
+ if (!pdu_len)
+ return -EINVAL;
+
if (len <= pdu_len) {
sar = L2CAP_SAR_UNSEGMENTED;
sdu_len = 0;
@@ -4312,14 +4336,16 @@ static inline int l2cap_config_req(struct l2cap_conn *conn,
if (test_bit(CONF_INPUT_DONE, &chan->conf_state)) {
set_default_fcs(chan);
- if (chan->mode == L2CAP_MODE_ERTM ||
- chan->mode == L2CAP_MODE_STREAMING)
- err = l2cap_ertm_init(chan);
+ if (chan->state != BT_CONNECTED) {
+ if (chan->mode == L2CAP_MODE_ERTM ||
+ chan->mode == L2CAP_MODE_STREAMING)
+ err = l2cap_ertm_init(chan);
- if (err < 0)
- l2cap_send_disconn_req(chan, -err);
- else
- l2cap_chan_ready(chan);
+ if (err < 0)
+ l2cap_send_disconn_req(chan, -err);
+ else
+ l2cap_chan_ready(chan);
+ }
goto unlock;
}
@@ -5081,14 +5107,14 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn,
cmd_len -= sizeof(*req);
num_scid = cmd_len / sizeof(u16);
- /* Always respond with the same number of scids as in the request */
- rsp_len = cmd_len;
-
if (num_scid > L2CAP_ECRED_MAX_CID) {
result = L2CAP_CR_LE_INVALID_PARAMS;
goto response;
}
+ /* Always respond with the same number of scids as in the request */
+ rsp_len = cmd_len;
+
mtu = __le16_to_cpu(req->mtu);
mps = __le16_to_cpu(req->mps);
@@ -6607,6 +6633,10 @@ static void l2cap_chan_le_send_credits(struct l2cap_chan *chan)
struct l2cap_le_credits pkt;
u16 return_credits = l2cap_le_rx_credits(chan);
+ if (chan->mode != L2CAP_MODE_LE_FLOWCTL &&
+ chan->mode != L2CAP_MODE_EXT_FLOWCTL)
+ return;
+
if (chan->rx_credits >= return_credits)
return;
@@ -6690,6 +6720,11 @@ static int l2cap_ecred_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb)
if (!chan->sdu) {
u16 sdu_len;
+ if (!pskb_may_pull(skb, L2CAP_SDULEN_SIZE)) {
+ err = -EINVAL;
+ goto failed;
+ }
+
sdu_len = get_unaligned_le16(skb->data);
skb_pull(skb, L2CAP_SDULEN_SIZE);
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 597686790371..71e8c1b45bce 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -1698,6 +1698,9 @@ static void l2cap_sock_ready_cb(struct l2cap_chan *chan)
struct sock *sk = chan->data;
struct sock *parent;
+ if (!sk)
+ return;
+
lock_sock(sk);
parent = bt_sk(sk)->parent;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index d52238ce6a9a..b05bb380e5f8 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -2478,6 +2478,7 @@ static int mesh_send(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
struct mgmt_mesh_tx *mesh_tx;
struct mgmt_cp_mesh_send *send = data;
struct mgmt_rp_mesh_read_features rp;
+ u16 expected_len;
bool sending;
int err = 0;
@@ -2485,12 +2486,19 @@ static int mesh_send(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
!hci_dev_test_flag(hdev, HCI_MESH_EXPERIMENTAL))
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND,
MGMT_STATUS_NOT_SUPPORTED);
- if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED) ||
- len <= MGMT_MESH_SEND_SIZE ||
- len > (MGMT_MESH_SEND_SIZE + 31))
+ if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED))
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND,
MGMT_STATUS_REJECTED);
+ if (!send->adv_data_len || send->adv_data_len > 31)
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND,
+ MGMT_STATUS_REJECTED);
+
+ expected_len = struct_size(send, adv_data, send->adv_data_len);
+ if (expected_len != len)
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND,
+ MGMT_STATUS_INVALID_PARAMS);
+
hci_dev_lock(hdev);
memset(&rp, 0, sizeof(rp));
@@ -5355,7 +5363,7 @@ static void mgmt_add_adv_patterns_monitor_complete(struct hci_dev *hdev,
* hci_adv_monitors_clear is about to be called which will take care of
* freeing the adv_monitor instances.
*/
- if (status == -ECANCELED && !mgmt_pending_valid(hdev, cmd))
+ if (status == -ECANCELED || !mgmt_pending_valid(hdev, cmd))
return;
monitor = cmd->user_data;
@@ -7248,6 +7256,9 @@ static bool ltk_is_valid(struct mgmt_ltk_info *key)
if (key->initiator != 0x00 && key->initiator != 0x01)
return false;
+ if (key->enc_size > sizeof(key->val))
+ return false;
+
switch (key->addr.type) {
case BDADDR_LE_PUBLIC:
return true;
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index e7db50165879..b84587811ef4 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -298,7 +298,7 @@ static int sco_chan_add(struct sco_conn *conn, struct sock *sk,
int err = 0;
sco_conn_lock(conn);
- if (conn->sk)
+ if (conn->sk || sco_pi(sk)->conn)
err = -EBUSY;
else
__sco_chan_add(conn, sk, parent);
@@ -353,9 +353,20 @@ static int sco_connect(struct sock *sk)
lock_sock(sk);
+ /* Recheck state after reacquiring the socket lock, as another
+ * thread may have changed it (e.g., closed the socket).
+ */
+ if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) {
+ release_sock(sk);
+ hci_conn_drop(hcon);
+ err = -EBADFD;
+ goto unlock;
+ }
+
err = sco_chan_add(conn, sk, NULL);
if (err) {
release_sock(sk);
+ hci_conn_drop(hcon);
goto unlock;
}
@@ -401,7 +412,7 @@ static void sco_recv_frame(struct sco_conn *conn, struct sk_buff *skb)
struct sock *sk;
sco_conn_lock(conn);
- sk = conn->sk;
+ sk = sco_sock_hold(conn);
sco_conn_unlock(conn);
if (!sk)
@@ -410,11 +421,15 @@ static void sco_recv_frame(struct sco_conn *conn, struct sk_buff *skb)
BT_DBG("sk %p len %u", sk, skb->len);
if (sk->sk_state != BT_CONNECTED)
- goto drop;
+ goto drop_put;
- if (!sock_queue_rcv_skb(sk, skb))
+ if (!sock_queue_rcv_skb(sk, skb)) {
+ sock_put(sk);
return;
+ }
+drop_put:
+ sock_put(sk);
drop:
kfree_skb(skb);
}
@@ -652,13 +667,18 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr_unsized *addr,
addr->sa_family != AF_BLUETOOTH)
return -EINVAL;
- if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND)
+ lock_sock(sk);
+
+ if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) {
+ release_sock(sk);
return -EBADFD;
+ }
- if (sk->sk_type != SOCK_SEQPACKET)
- err = -EINVAL;
+ if (sk->sk_type != SOCK_SEQPACKET) {
+ release_sock(sk);
+ return -EINVAL;
+ }
- lock_sock(sk);
/* Set destination address and psm */
bacpy(&sco_pi(sk)->dst, &sa->sco_bdaddr);
release_sock(sk);
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 485e3468bd26..98f1da4f5f55 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -1018,10 +1018,7 @@ static u8 smp_random(struct smp_chan *smp)
smp_s1(smp->tk, smp->prnd, smp->rrnd, stk);
- if (hcon->pending_sec_level == BT_SECURITY_HIGH)
- auth = 1;
- else
- auth = 0;
+ auth = test_bit(SMP_FLAG_MITM_AUTH, &smp->flags) ? 1 : 0;
/* Even though there's no _RESPONDER suffix this is the
* responder STK we're adding for later lookup (the initiator
@@ -1826,7 +1823,7 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb)
if (sec_level > conn->hcon->pending_sec_level)
conn->hcon->pending_sec_level = sec_level;
- /* If we need MITM check that it can be achieved */
+ /* If we need MITM check that it can be achieved. */
if (conn->hcon->pending_sec_level >= BT_SECURITY_HIGH) {
u8 method;
@@ -1834,6 +1831,10 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb)
req->io_capability);
if (method == JUST_WORKS || method == JUST_CFM)
return SMP_AUTH_REQUIREMENTS;
+
+ /* Force MITM bit if it isn't set by the initiator. */
+ auth |= SMP_AUTH_MITM;
+ rsp.auth_req |= SMP_AUTH_MITM;
}
key_size = min(req->max_key_size, rsp.max_key_size);
diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c
index 1e2b51769eec..6b5595868a39 100644
--- a/net/bridge/br_arp_nd_proxy.c
+++ b/net/bridge/br_arp_nd_proxy.c
@@ -251,12 +251,12 @@ struct nd_msg *br_is_nd_neigh_msg(const struct sk_buff *skb, struct nd_msg *msg)
static void br_nd_send(struct net_bridge *br, struct net_bridge_port *p,
struct sk_buff *request, struct neighbour *n,
- __be16 vlan_proto, u16 vlan_tci, struct nd_msg *ns)
+ __be16 vlan_proto, u16 vlan_tci)
{
struct net_device *dev = request->dev;
struct net_bridge_vlan_group *vg;
+ struct nd_msg *na, *ns;
struct sk_buff *reply;
- struct nd_msg *na;
struct ipv6hdr *pip6;
int na_olen = 8; /* opt hdr + ETH_ALEN for target */
int ns_olen;
@@ -264,7 +264,7 @@ static void br_nd_send(struct net_bridge *br, struct net_bridge_port *p,
u8 *daddr;
u16 pvid;
- if (!dev)
+ if (!dev || skb_linearize(request))
return;
len = LL_RESERVED_SPACE(dev) + sizeof(struct ipv6hdr) +
@@ -281,17 +281,21 @@ static void br_nd_send(struct net_bridge *br, struct net_bridge_port *p,
skb_set_mac_header(reply, 0);
daddr = eth_hdr(request)->h_source;
+ ns = (struct nd_msg *)(skb_network_header(request) +
+ sizeof(struct ipv6hdr));
/* Do we need option processing ? */
ns_olen = request->len - (skb_network_offset(request) +
sizeof(struct ipv6hdr)) - sizeof(*ns);
for (i = 0; i < ns_olen - 1; i += (ns->opt[i + 1] << 3)) {
- if (!ns->opt[i + 1]) {
+ if (!ns->opt[i + 1] || i + (ns->opt[i + 1] << 3) > ns_olen) {
kfree_skb(reply);
return;
}
if (ns->opt[i] == ND_OPT_SOURCE_LL_ADDR) {
- daddr = ns->opt + i + sizeof(struct nd_opt_hdr);
+ if ((ns->opt[i + 1] << 3) >=
+ sizeof(struct nd_opt_hdr) + ETH_ALEN)
+ daddr = ns->opt + i + sizeof(struct nd_opt_hdr);
break;
}
}
@@ -472,9 +476,9 @@ void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br,
if (vid != 0)
br_nd_send(br, p, skb, n,
skb->vlan_proto,
- skb_vlan_tag_get(skb), msg);
+ skb_vlan_tag_get(skb));
else
- br_nd_send(br, p, skb, n, 0, 0, msg);
+ br_nd_send(br, p, skb, n, 0, 0);
replied = true;
}
diff --git a/net/bridge/br_mrp_netlink.c b/net/bridge/br_mrp_netlink.c
index ce6f63c77cc0..86f0e75d6e34 100644
--- a/net/bridge/br_mrp_netlink.c
+++ b/net/bridge/br_mrp_netlink.c
@@ -196,7 +196,7 @@ static const struct nla_policy
br_mrp_start_test_policy[IFLA_BRIDGE_MRP_START_TEST_MAX + 1] = {
[IFLA_BRIDGE_MRP_START_TEST_UNSPEC] = { .type = NLA_REJECT },
[IFLA_BRIDGE_MRP_START_TEST_RING_ID] = { .type = NLA_U32 },
- [IFLA_BRIDGE_MRP_START_TEST_INTERVAL] = { .type = NLA_U32 },
+ [IFLA_BRIDGE_MRP_START_TEST_INTERVAL] = NLA_POLICY_MIN(NLA_U32, 1),
[IFLA_BRIDGE_MRP_START_TEST_MAX_MISS] = { .type = NLA_U32 },
[IFLA_BRIDGE_MRP_START_TEST_PERIOD] = { .type = NLA_U32 },
[IFLA_BRIDGE_MRP_START_TEST_MONITOR] = { .type = NLA_U32 },
@@ -316,7 +316,7 @@ static const struct nla_policy
br_mrp_start_in_test_policy[IFLA_BRIDGE_MRP_START_IN_TEST_MAX + 1] = {
[IFLA_BRIDGE_MRP_START_IN_TEST_UNSPEC] = { .type = NLA_REJECT },
[IFLA_BRIDGE_MRP_START_IN_TEST_IN_ID] = { .type = NLA_U32 },
- [IFLA_BRIDGE_MRP_START_IN_TEST_INTERVAL] = { .type = NLA_U32 },
+ [IFLA_BRIDGE_MRP_START_IN_TEST_INTERVAL] = NLA_POLICY_MIN(NLA_U32, 1),
[IFLA_BRIDGE_MRP_START_IN_TEST_MAX_MISS] = { .type = NLA_U32 },
[IFLA_BRIDGE_MRP_START_IN_TEST_PERIOD] = { .type = NLA_U32 },
};
diff --git a/net/can/af_can.c b/net/can/af_can.c
index f70e2ba0aadc..7bc86b176b4d 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -469,7 +469,7 @@ int can_rx_register(struct net *net, struct net_device *dev, canid_t can_id,
rcv->can_id = can_id;
rcv->mask = mask;
- rcv->matches = 0;
+ atomic_long_set(&rcv->matches, 0);
rcv->func = func;
rcv->data = data;
rcv->ident = ident;
@@ -573,7 +573,7 @@ EXPORT_SYMBOL(can_rx_unregister);
static inline void deliver(struct sk_buff *skb, struct receiver *rcv)
{
rcv->func(skb, rcv->data);
- rcv->matches++;
+ atomic_long_inc(&rcv->matches);
}
static int can_rcv_filter(struct can_dev_rcv_lists *dev_rcv_lists, struct sk_buff *skb)
diff --git a/net/can/af_can.h b/net/can/af_can.h
index 22f3352c77fe..87887014f562 100644
--- a/net/can/af_can.h
+++ b/net/can/af_can.h
@@ -52,7 +52,7 @@ struct receiver {
struct hlist_node list;
canid_t can_id;
canid_t mask;
- unsigned long matches;
+ atomic_long_t matches;
void (*func)(struct sk_buff *skb, void *data);
void *data;
char *ident;
diff --git a/net/can/gw.c b/net/can/gw.c
index 8ee4d67a07d3..0ec99f68aa45 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -375,10 +375,10 @@ static void cgw_csum_crc8_rel(struct canfd_frame *cf,
return;
if (from <= to) {
- for (i = crc8->from_idx; i <= crc8->to_idx; i++)
+ for (i = from; i <= to; i++)
crc = crc8->crctab[crc ^ cf->data[i]];
} else {
- for (i = crc8->from_idx; i >= crc8->to_idx; i--)
+ for (i = from; i >= to; i--)
crc = crc8->crctab[crc ^ cf->data[i]];
}
@@ -397,7 +397,7 @@ static void cgw_csum_crc8_rel(struct canfd_frame *cf,
break;
}
- cf->data[crc8->result_idx] = crc ^ crc8->final_xor_val;
+ cf->data[res] = crc ^ crc8->final_xor_val;
}
static void cgw_csum_crc8_pos(struct canfd_frame *cf,
diff --git a/net/can/isotp.c b/net/can/isotp.c
index da3b72e7afcc..2770f43f4951 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -1248,12 +1248,6 @@ static int isotp_release(struct socket *sock)
so->ifindex = 0;
so->bound = 0;
- if (so->rx.buf != so->rx.sbuf)
- kfree(so->rx.buf);
-
- if (so->tx.buf != so->tx.sbuf)
- kfree(so->tx.buf);
-
sock_orphan(sk);
sock->sk = NULL;
@@ -1622,6 +1616,21 @@ static int isotp_notifier(struct notifier_block *nb, unsigned long msg,
return NOTIFY_DONE;
}
+static void isotp_sock_destruct(struct sock *sk)
+{
+ struct isotp_sock *so = isotp_sk(sk);
+
+ /* do the standard CAN sock destruct work */
+ can_sock_destruct(sk);
+
+ /* free potential extended PDU buffers */
+ if (so->rx.buf != so->rx.sbuf)
+ kfree(so->rx.buf);
+
+ if (so->tx.buf != so->tx.sbuf)
+ kfree(so->tx.buf);
+}
+
static int isotp_init(struct sock *sk)
{
struct isotp_sock *so = isotp_sk(sk);
@@ -1666,6 +1675,9 @@ static int isotp_init(struct sock *sk)
list_add_tail(&so->notifier, &isotp_notifier_list);
spin_unlock(&isotp_notifier_lock);
+ /* re-assign default can_sock_destruct() reference */
+ sk->sk_destruct = isotp_sock_destruct;
+
return 0;
}
diff --git a/net/can/proc.c b/net/can/proc.c
index 0938bf7dd646..de4d05ae3459 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -196,7 +196,8 @@ static void can_print_rcvlist(struct seq_file *m, struct hlist_head *rx_list,
" %-5s %03x %08x %pK %pK %8ld %s\n";
seq_printf(m, fmt, DNAME(dev), r->can_id, r->mask,
- r->func, r->data, r->matches, r->ident);
+ r->func, r->data, atomic_long_read(&r->matches),
+ r->ident);
}
}
diff --git a/net/core/dev.c b/net/core/dev.c
index 14a83f2035b9..831129f2a69b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3769,6 +3769,22 @@ static netdev_features_t dflt_features_check(struct sk_buff *skb,
return vlan_features_check(skb, features);
}
+static bool skb_gso_has_extension_hdr(const struct sk_buff *skb)
+{
+ if (!skb->encapsulation)
+ return ((skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6 ||
+ (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 &&
+ vlan_get_protocol(skb) == htons(ETH_P_IPV6))) &&
+ skb_transport_header_was_set(skb) &&
+ skb_network_header_len(skb) != sizeof(struct ipv6hdr));
+ else
+ return (!skb_inner_network_header_was_set(skb) ||
+ ((skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6 ||
+ (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 &&
+ inner_ip_hdr(skb)->version == 6)) &&
+ skb_inner_network_header_len(skb) != sizeof(struct ipv6hdr)));
+}
+
static netdev_features_t gso_features_check(const struct sk_buff *skb,
struct net_device *dev,
netdev_features_t features)
@@ -3805,10 +3821,15 @@ static netdev_features_t gso_features_check(const struct sk_buff *skb,
* segmentation-offloads.rst).
*/
if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
- struct iphdr *iph = skb->encapsulation ?
- inner_ip_hdr(skb) : ip_hdr(skb);
+ const struct iphdr *iph;
+ struct iphdr _iph;
+ int nhoff = skb->encapsulation ?
+ skb_inner_network_offset(skb) :
+ skb_network_offset(skb);
+
+ iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
- if (!(iph->frag_off & htons(IP_DF)))
+ if (!iph || !(iph->frag_off & htons(IP_DF)))
features &= ~dev->mangleid_features;
}
@@ -3816,11 +3837,7 @@ static netdev_features_t gso_features_check(const struct sk_buff *skb,
* so neither does TSO that depends on it.
*/
if (features & NETIF_F_IPV6_CSUM &&
- (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6 ||
- (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 &&
- vlan_get_protocol(skb) == htons(ETH_P_IPV6))) &&
- skb_transport_header_was_set(skb) &&
- skb_network_header_len(skb) != sizeof(struct ipv6hdr))
+ skb_gso_has_extension_hdr(skb))
features &= ~(NETIF_F_IPV6_CSUM | NETIF_F_TSO6 | NETIF_F_GSO_UDP_L4);
return features;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index dad4b1054955..fae8034efbff 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -629,6 +629,9 @@ int rtnl_link_register(struct rtnl_link_ops *ops)
unlock:
mutex_unlock(&link_ops_mutex);
+ if (err)
+ cleanup_srcu_struct(&ops->srcu);
+
return err;
}
EXPORT_SYMBOL_GPL(rtnl_link_register);
@@ -707,11 +710,14 @@ static size_t rtnl_link_get_slave_info_data_size(const struct net_device *dev)
goto out;
ops = master_dev->rtnl_link_ops;
- if (!ops || !ops->get_slave_size)
+ if (!ops)
+ goto out;
+ size += nla_total_size(strlen(ops->kind) + 1); /* IFLA_INFO_SLAVE_KIND */
+ if (!ops->get_slave_size)
goto out;
/* IFLA_INFO_SLAVE_DATA + nested data */
- size = nla_total_size(sizeof(struct nlattr)) +
- ops->get_slave_size(master_dev, dev);
+ size += nla_total_size(sizeof(struct nlattr)) +
+ ops->get_slave_size(master_dev, dev);
out:
rcu_read_unlock();
@@ -1267,6 +1273,21 @@ static size_t rtnl_dpll_pin_size(const struct net_device *dev)
return size;
}
+static size_t rtnl_dev_parent_size(const struct net_device *dev)
+{
+ size_t size = 0;
+
+ /* IFLA_PARENT_DEV_NAME */
+ if (dev->dev.parent)
+ size += nla_total_size(strlen(dev_name(dev->dev.parent)) + 1);
+
+ /* IFLA_PARENT_DEV_BUS_NAME */
+ if (dev->dev.parent && dev->dev.parent->bus)
+ size += nla_total_size(strlen(dev->dev.parent->bus->name) + 1);
+
+ return size;
+}
+
static noinline size_t if_nlmsg_size(const struct net_device *dev,
u32 ext_filter_mask)
{
@@ -1328,6 +1349,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(8) /* IFLA_MAX_PACING_OFFLOAD_HORIZON */
+ nla_total_size(2) /* IFLA_HEADROOM */
+ nla_total_size(2) /* IFLA_TAILROOM */
+ + rtnl_dev_parent_size(dev)
+ 0;
if (!(ext_filter_mask & RTEXT_FILTER_SKIP_STATS))
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 3261793abe83..6187a83bd741 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -1267,17 +1267,20 @@ out:
static void sk_psock_verdict_data_ready(struct sock *sk)
{
- struct socket *sock = sk->sk_socket;
- const struct proto_ops *ops;
+ const struct proto_ops *ops = NULL;
+ struct socket *sock;
int copied;
trace_sk_data_ready(sk);
- if (unlikely(!sock))
- return;
- ops = READ_ONCE(sock->ops);
+ rcu_read_lock();
+ sock = READ_ONCE(sk->sk_socket);
+ if (likely(sock))
+ ops = READ_ONCE(sock->ops);
+ rcu_read_unlock();
if (!ops || !ops->read_skb)
return;
+
copied = ops->read_skb(sk, sk_psock_verdict_recv);
if (copied >= 0) {
struct sk_psock *psock;
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index d1bfc49b5f01..fd2fea25eff0 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -532,8 +532,8 @@ static void hsr_change_rx_flags(struct net_device *dev, int change)
static int hsr_ndo_vlan_rx_add_vid(struct net_device *dev,
__be16 proto, u16 vid)
{
- bool is_slave_a_added = false;
- bool is_slave_b_added = false;
+ struct net_device *slave_a_dev = NULL;
+ struct net_device *slave_b_dev = NULL;
struct hsr_port *port;
struct hsr_priv *hsr;
int ret = 0;
@@ -549,33 +549,35 @@ static int hsr_ndo_vlan_rx_add_vid(struct net_device *dev,
switch (port->type) {
case HSR_PT_SLAVE_A:
if (ret) {
- /* clean up Slave-B */
netdev_err(dev, "add vid failed for Slave-A\n");
- if (is_slave_b_added)
- vlan_vid_del(port->dev, proto, vid);
- return ret;
+ goto unwind;
}
-
- is_slave_a_added = true;
+ slave_a_dev = port->dev;
break;
-
case HSR_PT_SLAVE_B:
if (ret) {
- /* clean up Slave-A */
netdev_err(dev, "add vid failed for Slave-B\n");
- if (is_slave_a_added)
- vlan_vid_del(port->dev, proto, vid);
- return ret;
+ goto unwind;
}
-
- is_slave_b_added = true;
+ slave_b_dev = port->dev;
break;
default:
+ if (ret)
+ goto unwind;
break;
}
}
return 0;
+
+unwind:
+ if (slave_a_dev)
+ vlan_vid_del(slave_a_dev, proto, vid);
+
+ if (slave_b_dev)
+ vlan_vid_del(slave_b_dev, proto, vid);
+
+ return ret;
}
static int hsr_ndo_vlan_rx_kill_vid(struct net_device *dev,
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 50996f4de7f9..d41863593674 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -123,6 +123,40 @@ static void hsr_free_node_rcu(struct rcu_head *rn)
hsr_free_node(node);
}
+static void hsr_lock_seq_out_pair(struct hsr_node *node_a,
+ struct hsr_node *node_b)
+{
+ if (node_a == node_b) {
+ spin_lock_bh(&node_a->seq_out_lock);
+ return;
+ }
+
+ if (node_a < node_b) {
+ spin_lock_bh(&node_a->seq_out_lock);
+ spin_lock_nested(&node_b->seq_out_lock, SINGLE_DEPTH_NESTING);
+ } else {
+ spin_lock_bh(&node_b->seq_out_lock);
+ spin_lock_nested(&node_a->seq_out_lock, SINGLE_DEPTH_NESTING);
+ }
+}
+
+static void hsr_unlock_seq_out_pair(struct hsr_node *node_a,
+ struct hsr_node *node_b)
+{
+ if (node_a == node_b) {
+ spin_unlock_bh(&node_a->seq_out_lock);
+ return;
+ }
+
+ if (node_a < node_b) {
+ spin_unlock(&node_b->seq_out_lock);
+ spin_unlock_bh(&node_a->seq_out_lock);
+ } else {
+ spin_unlock(&node_a->seq_out_lock);
+ spin_unlock_bh(&node_b->seq_out_lock);
+ }
+}
+
void hsr_del_nodes(struct list_head *node_db)
{
struct hsr_node *node;
@@ -432,7 +466,7 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame)
}
ether_addr_copy(node_real->macaddress_B, ethhdr->h_source);
- spin_lock_bh(&node_real->seq_out_lock);
+ hsr_lock_seq_out_pair(node_real, node_curr);
for (i = 0; i < HSR_PT_PORTS; i++) {
if (!node_curr->time_in_stale[i] &&
time_after(node_curr->time_in[i], node_real->time_in[i])) {
@@ -455,7 +489,7 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame)
src_blk->seq_nrs[i], HSR_SEQ_BLOCK_SIZE);
}
}
- spin_unlock_bh(&node_real->seq_out_lock);
+ hsr_unlock_seq_out_pair(node_real, node_curr);
node_real->addr_B_port = port_rcv->type;
spin_lock_bh(&hsr->list_lock);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 2c922afadb8f..6dfc0bcdef65 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -235,10 +235,13 @@ static void esp_output_done(void *data, int err)
xfrm_dev_resume(skb);
} else {
if (!err &&
- x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP)
- esp_output_tail_tcp(x, skb);
- else
+ x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) {
+ err = esp_output_tail_tcp(x, skb);
+ if (err != -EINPROGRESS)
+ kfree_skb(skb);
+ } else {
xfrm_output_resume(skb_to_full_sk(skb), skb, err);
+ }
}
}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 5dfac6ce1110..e961936b6be7 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -154,20 +154,6 @@ bool inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high)
}
EXPORT_SYMBOL(inet_sk_get_local_port_range);
-static bool inet_use_bhash2_on_bind(const struct sock *sk)
-{
-#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == AF_INET6) {
- if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
- return false;
-
- if (!ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
- return true;
- }
-#endif
- return sk->sk_rcv_saddr != htonl(INADDR_ANY);
-}
-
static bool inet_bind_conflict(const struct sock *sk, struct sock *sk2,
kuid_t uid, bool relax,
bool reuseport_cb_ok, bool reuseport_ok)
@@ -259,7 +245,7 @@ static int inet_csk_bind_conflict(const struct sock *sk,
* checks separately because their spinlocks have to be acquired/released
* independently of each other, to prevent possible deadlocks
*/
- if (inet_use_bhash2_on_bind(sk))
+ if (inet_use_hash2_on_bind(sk))
return tb2 && inet_bhash2_conflict(sk, tb2, uid, relax,
reuseport_cb_ok, reuseport_ok);
@@ -376,7 +362,7 @@ other_parity_scan:
head = &hinfo->bhash[inet_bhashfn(net, port,
hinfo->bhash_size)];
spin_lock_bh(&head->lock);
- if (inet_use_bhash2_on_bind(sk)) {
+ if (inet_use_hash2_on_bind(sk)) {
if (inet_bhash2_addr_any_conflict(sk, port, l3mdev, relax, false))
goto next_port;
}
@@ -562,7 +548,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
check_bind_conflict = false;
}
- if (check_bind_conflict && inet_use_bhash2_on_bind(sk)) {
+ if (check_bind_conflict && inet_use_hash2_on_bind(sk)) {
if (inet_bhash2_addr_any_conflict(sk, port, l3mdev, true, true))
goto fail_unlock;
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index b60fad393e18..cb99a3c27053 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -287,7 +287,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
} else {
hslot = udp_hashslot(udptable, net, snum);
spin_lock_bh(&hslot->lock);
- if (hslot->count > 10) {
+ if (inet_use_hash2_on_bind(sk) && hslot->count > 10) {
int exist;
unsigned int slot2 = udp_sk(sk)->udp_portaddr_hash ^ snum;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 04c1e856bf7f..22c5cdffeae7 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2863,7 +2863,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
fib6_add_gc_list(rt);
} else {
fib6_clean_expires(rt);
- fib6_remove_gc_list(rt);
+ fib6_may_remove_gc_list(net, rt);
}
spin_unlock_bh(&table->tb6_lock);
@@ -3626,12 +3626,12 @@ static void addrconf_permanent_addr(struct net *net, struct net_device *dev)
if ((ifp->flags & IFA_F_PERMANENT) &&
fixup_permanent_addr(net, idev, ifp) < 0) {
write_unlock_bh(&idev->lock);
- in6_ifa_hold(ifp);
- ipv6_del_addr(ifp);
- write_lock_bh(&idev->lock);
net_info_ratelimited("%s: Failed to add prefix route for address %pI6c; dropping\n",
idev->dev->name, &ifp->addr);
+ in6_ifa_hold(ifp);
+ ipv6_del_addr(ifp);
+ write_lock_bh(&idev->lock);
}
}
@@ -4841,7 +4841,7 @@ static int modify_prefix_route(struct net *net, struct inet6_ifaddr *ifp,
if (!(flags & RTF_EXPIRES)) {
fib6_clean_expires(f6i);
- fib6_remove_gc_list(f6i);
+ fib6_may_remove_gc_list(net, f6i);
} else {
fib6_set_expires(f6i, expires);
fib6_add_gc_list(f6i);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index c564b68a0562..993e2d76fc1f 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -763,6 +763,7 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
{
struct in6_pktinfo *src_info;
struct cmsghdr *cmsg;
+ struct ipv6_rt_hdr *orthdr;
struct ipv6_rt_hdr *rthdr;
struct ipv6_opt_hdr *hdr;
struct ipv6_txoptions *opt = ipc6->opt;
@@ -924,9 +925,13 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
goto exit_f;
}
if (cmsg->cmsg_type == IPV6_DSTOPTS) {
+ if (opt->dst1opt)
+ opt->opt_flen -= ipv6_optlen(opt->dst1opt);
opt->opt_flen += len;
opt->dst1opt = hdr;
} else {
+ if (opt->dst0opt)
+ opt->opt_nflen -= ipv6_optlen(opt->dst0opt);
opt->opt_nflen += len;
opt->dst0opt = hdr;
}
@@ -969,12 +974,17 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
goto exit_f;
}
+ orthdr = opt->srcrt;
+ if (orthdr)
+ opt->opt_nflen -= ((orthdr->hdrlen + 1) << 3);
opt->opt_nflen += len;
opt->srcrt = rthdr;
if (cmsg->cmsg_type == IPV6_2292RTHDR && opt->dst1opt) {
int dsthdrlen = ((opt->dst1opt->hdrlen+1)<<3);
+ if (opt->dst0opt)
+ opt->opt_nflen -= ipv6_optlen(opt->dst0opt);
opt->opt_nflen += dsthdrlen;
opt->dst0opt = opt->dst1opt;
opt->dst1opt = NULL;
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index e75da98f5283..9f75313734f8 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -271,10 +271,13 @@ static void esp_output_done(void *data, int err)
xfrm_dev_resume(skb);
} else {
if (!err &&
- x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP)
- esp_output_tail_tcp(x, skb);
- else
+ x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) {
+ err = esp_output_tail_tcp(x, skb);
+ if (err != -EINPROGRESS)
+ kfree_skb(skb);
+ } else {
xfrm_output_resume(skb_to_full_sk(skb), skb, err);
+ }
}
}
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 813d2e9edb8b..d5d23a9296ea 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -875,6 +875,9 @@ int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
if (!skb2)
return 1;
+ /* Remove debris left by IPv4 stack. */
+ memset(IP6CB(skb2), 0, sizeof(*IP6CB(skb2)));
+
skb_dst_drop(skb2);
skb_pull(skb2, nhs);
skb_reset_network_header(skb2);
diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c
index b76f89d92e7b..3978773bec42 100644
--- a/net/ipv6/ioam6.c
+++ b/net/ipv6/ioam6.c
@@ -708,7 +708,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
struct ioam6_namespace *ns,
struct ioam6_trace_hdr *trace,
struct ioam6_schema *sc,
- u8 sclen, bool is_input)
+ unsigned int sclen, bool is_input)
{
struct net_device *dev = skb_dst_dev(skb);
struct timespec64 ts;
@@ -939,7 +939,7 @@ void ioam6_fill_trace_data(struct sk_buff *skb,
bool is_input)
{
struct ioam6_schema *sc;
- u8 sclen = 0;
+ unsigned int sclen = 0;
/* Skip if Overflow flag is set
*/
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 9058e71241dc..45ef4d65dcbc 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -727,20 +727,28 @@ unlock:
void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val)
{
+ struct dst_metrics *m;
+
if (!f6i)
return;
- if (f6i->fib6_metrics == &dst_default_metrics) {
+ if (READ_ONCE(f6i->fib6_metrics) == &dst_default_metrics) {
+ struct dst_metrics *dflt = (struct dst_metrics *)&dst_default_metrics;
struct dst_metrics *p = kzalloc_obj(*p, GFP_ATOMIC);
if (!p)
return;
+ p->metrics[metric - 1] = val;
refcount_set(&p->refcnt, 1);
- f6i->fib6_metrics = p;
+ if (cmpxchg(&f6i->fib6_metrics, dflt, p) != dflt)
+ kfree(p);
+ else
+ return;
}
- f6i->fib6_metrics->metrics[metric - 1] = val;
+ m = READ_ONCE(f6i->fib6_metrics);
+ WRITE_ONCE(m->metrics[metric - 1], val);
}
/*
@@ -1133,7 +1141,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
return -EEXIST;
if (!(rt->fib6_flags & RTF_EXPIRES)) {
fib6_clean_expires(iter);
- fib6_remove_gc_list(iter);
+ fib6_may_remove_gc_list(info->nl_net, iter);
} else {
fib6_set_expires(iter, rt->expires);
fib6_add_gc_list(iter);
@@ -2348,6 +2356,17 @@ static void fib6_flush_trees(struct net *net)
/*
* Garbage collection
*/
+void fib6_age_exceptions(struct fib6_info *rt, struct fib6_gc_args *gc_args,
+ unsigned long now)
+{
+ bool may_expire = rt->fib6_flags & RTF_EXPIRES && rt->expires;
+ int old_more = gc_args->more;
+
+ rt6_age_exceptions(rt, gc_args, now);
+
+ if (!may_expire && old_more == gc_args->more)
+ fib6_remove_gc_list(rt);
+}
static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args)
{
@@ -2370,7 +2389,7 @@ static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args)
* Note, that clones are aged out
* only if they are not in use now.
*/
- rt6_age_exceptions(rt, gc_args, now);
+ fib6_age_exceptions(rt, gc_args, now);
return 0;
}
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 7c12bf75beed..c92f98c6f6ec 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -133,11 +133,6 @@ static void fl_release(struct ip6_flowlabel *fl)
if (time_after(ttd, fl->expires))
fl->expires = ttd;
ttd = fl->expires;
- if (fl->opt && fl->share == IPV6_FL_S_EXCL) {
- struct ipv6_txoptions *opt = fl->opt;
- fl->opt = NULL;
- kfree(opt);
- }
if (!timer_pending(&ip6_fl_gc_timer) ||
time_after(ip6_fl_gc_timer.expires, ttd))
mod_timer(&ip6_fl_gc_timer, ttd);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 4c29aa94e86e..0b53488a9229 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -601,11 +601,16 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (!skb2)
return 0;
+ /* Remove debris left by IPv6 stack. */
+ memset(IPCB(skb2), 0, sizeof(*IPCB(skb2)));
+
skb_dst_drop(skb2);
skb_pull(skb2, offset);
skb_reset_network_header(skb2);
eiph = ip_hdr(skb2);
+ if (eiph->version != 4 || eiph->ihl < 5)
+ goto out;
/* Try to guess incoming interface */
rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, eiph->saddr,
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index f6a5d8c73af9..186e60c79214 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1209,6 +1209,9 @@ static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt)
ndmsg->nduseropt_icmp_type = icmp6h->icmp6_type;
ndmsg->nduseropt_icmp_code = icmp6h->icmp6_code;
ndmsg->nduseropt_opts_len = opt->nd_opt_len << 3;
+ ndmsg->nduseropt_pad1 = 0;
+ ndmsg->nduseropt_pad2 = 0;
+ ndmsg->nduseropt_pad3 = 0;
memcpy(ndmsg + 1, opt, opt->nd_opt_len << 3);
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index 4ad8b2032f1f..5561bd9cea81 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -157,6 +157,10 @@ static int rt_mt6_check(const struct xt_mtchk_param *par)
pr_debug("unknown flags %X\n", rtinfo->invflags);
return -EINVAL;
}
+ if (rtinfo->addrnr > IP6T_RT_HOPS) {
+ pr_debug("too many addresses specified\n");
+ return -EINVAL;
+ }
if ((rtinfo->flags & (IP6T_RT_RES | IP6T_RT_FST_MASK)) &&
(!(rtinfo->flags & IP6T_RT_TYP) ||
(rtinfo->rt_type != 0) ||
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 08cd86f49bf9..cb521700cee7 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1033,7 +1033,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
if (!addrconf_finite_timeout(lifetime)) {
fib6_clean_expires(rt);
- fib6_remove_gc_list(rt);
+ fib6_may_remove_gc_list(net, rt);
} else {
fib6_set_expires(rt, jiffies + HZ * lifetime);
fib6_add_gc_list(rt);
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 0756bac62f7c..72ac2ace419d 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3518,7 +3518,7 @@ static int set_sadb_kmaddress(struct sk_buff *skb, const struct xfrm_kmaddress *
static int set_ipsecrequest(struct sk_buff *skb,
uint8_t proto, uint8_t mode, int level,
- uint32_t reqid, uint8_t family,
+ uint32_t reqid, sa_family_t family,
const xfrm_address_t *src, const xfrm_address_t *dst)
{
struct sadb_x_ipsecrequest *rq;
@@ -3583,12 +3583,17 @@ static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
/* ipsecrequests */
for (i = 0, mp = m; i < num_bundles; i++, mp++) {
- /* old locator pair */
- size_pol += sizeof(struct sadb_x_ipsecrequest) +
- pfkey_sockaddr_pair_size(mp->old_family);
- /* new locator pair */
- size_pol += sizeof(struct sadb_x_ipsecrequest) +
- pfkey_sockaddr_pair_size(mp->new_family);
+ int pair_size;
+
+ pair_size = pfkey_sockaddr_pair_size(mp->old_family);
+ if (!pair_size)
+ return -EINVAL;
+ size_pol += sizeof(struct sadb_x_ipsecrequest) + pair_size;
+
+ pair_size = pfkey_sockaddr_pair_size(mp->new_family);
+ if (!pair_size)
+ return -EINVAL;
+ size_pol += sizeof(struct sadb_x_ipsecrequest) + pair_size;
}
size += sizeof(struct sadb_msg) + size_pol;
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index d5417688f69e..18d3da8ab384 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -83,14 +83,30 @@ static struct mpls_route *mpls_route_input(struct net *net, unsigned int index)
return mpls_dereference(net, platform_label[index]);
}
+static struct mpls_route __rcu **mpls_platform_label_rcu(struct net *net, size_t *platform_labels)
+{
+ struct mpls_route __rcu **platform_label;
+ unsigned int sequence;
+
+ do {
+ sequence = read_seqcount_begin(&net->mpls.platform_label_seq);
+ platform_label = rcu_dereference(net->mpls.platform_label);
+ *platform_labels = net->mpls.platform_labels;
+ } while (read_seqcount_retry(&net->mpls.platform_label_seq, sequence));
+
+ return platform_label;
+}
+
static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned int index)
{
struct mpls_route __rcu **platform_label;
+ size_t platform_labels;
+
+ platform_label = mpls_platform_label_rcu(net, &platform_labels);
- if (index >= net->mpls.platform_labels)
+ if (index >= platform_labels)
return NULL;
- platform_label = rcu_dereference(net->mpls.platform_label);
return rcu_dereference(platform_label[index]);
}
@@ -2240,8 +2256,7 @@ static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb)
if (index < MPLS_LABEL_FIRST_UNRESERVED)
index = MPLS_LABEL_FIRST_UNRESERVED;
- platform_label = rcu_dereference(net->mpls.platform_label);
- platform_labels = net->mpls.platform_labels;
+ platform_label = mpls_platform_label_rcu(net, &platform_labels);
if (filter.filter_set)
flags |= NLM_F_DUMP_FILTERED;
@@ -2645,8 +2660,12 @@ static int resize_platform_label_table(struct net *net, size_t limit)
}
/* Update the global pointers */
+ local_bh_disable();
+ write_seqcount_begin(&net->mpls.platform_label_seq);
net->mpls.platform_labels = limit;
rcu_assign_pointer(net->mpls.platform_label, labels);
+ write_seqcount_end(&net->mpls.platform_label_seq);
+ local_bh_enable();
mutex_unlock(&net->mpls.platform_mutex);
@@ -2728,6 +2747,8 @@ static __net_init int mpls_net_init(struct net *net)
int i;
mutex_init(&net->mpls.platform_mutex);
+ seqcount_mutex_init(&net->mpls.platform_label_seq, &net->mpls.platform_mutex);
+
net->mpls.platform_labels = 0;
net->mpls.platform_label = NULL;
net->mpls.ip_ttl_propagate = 1;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index cf1852b99963..65c3bb8016f4 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2006,7 +2006,7 @@ static void mptcp_eat_recv_skb(struct sock *sk, struct sk_buff *skb)
static int __mptcp_recvmsg_mskq(struct sock *sk, struct msghdr *msg,
size_t len, int flags, int copied_total,
struct scm_timestamping_internal *tss,
- int *cmsg_flags)
+ int *cmsg_flags, struct sk_buff **last)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct sk_buff *skb, *tmp;
@@ -2023,6 +2023,7 @@ static int __mptcp_recvmsg_mskq(struct sock *sk, struct msghdr *msg,
/* skip already peeked skbs */
if (total_data_len + data_len <= copied_total) {
total_data_len += data_len;
+ *last = skb;
continue;
}
@@ -2058,6 +2059,8 @@ static int __mptcp_recvmsg_mskq(struct sock *sk, struct msghdr *msg,
}
mptcp_eat_recv_skb(sk, skb);
+ } else {
+ *last = skb;
}
if (copied >= len)
@@ -2288,10 +2291,12 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
cmsg_flags = MPTCP_CMSG_INQ;
while (copied < len) {
+ struct sk_buff *last = NULL;
int err, bytes_read;
bytes_read = __mptcp_recvmsg_mskq(sk, msg, len - copied, flags,
- copied, &tss, &cmsg_flags);
+ copied, &tss, &cmsg_flags,
+ &last);
if (unlikely(bytes_read < 0)) {
if (!copied)
copied = bytes_read;
@@ -2343,7 +2348,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
pr_debug("block timeout %ld\n", timeo);
mptcp_cleanup_rbuf(msk, copied);
- err = sk_wait_data(sk, &timeo, NULL);
+ err = sk_wait_data(sk, &timeo, last);
if (err < 0) {
err = copied ? : err;
goto out_err;
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index a2fe711cb5e3..d0c9fe59c67d 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -821,7 +821,7 @@ EXPORT_SYMBOL_GPL(ip_set_del);
*
*/
ip_set_id_t
-ip_set_get_byname(struct net *net, const char *name, struct ip_set **set)
+ip_set_get_byname(struct net *net, const struct nlattr *name, struct ip_set **set)
{
ip_set_id_t i, index = IPSET_INVALID_ID;
struct ip_set *s;
@@ -830,7 +830,7 @@ ip_set_get_byname(struct net *net, const char *name, struct ip_set **set)
rcu_read_lock();
for (i = 0; i < inst->ip_set_max; i++) {
s = rcu_dereference(inst->ip_set_list)[i];
- if (s && STRNCMP(s->name, name)) {
+ if (s && nla_strcmp(name, s->name) == 0) {
__ip_set_get(s);
index = i;
*set = s;
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 181daa9c2019..b79e5dd2af03 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -1098,7 +1098,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
if (!test_bit(i, n->used))
k++;
}
- if (n->pos == 0 && k == 0) {
+ if (k == n->pos) {
t->hregion[r].ext_size -= ext_size(n->size, dsize);
rcu_assign_pointer(hbucket(t, key), NULL);
kfree_rcu(n, rcu);
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 98b91b34c314..1cef84f15e8c 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -367,7 +367,7 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[],
ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
- e.id = ip_set_get_byname(map->net, nla_data(tb[IPSET_ATTR_NAME]), &s);
+ e.id = ip_set_get_byname(map->net, tb[IPSET_ATTR_NAME], &s);
if (e.id == IPSET_INVALID_ID)
return -IPSET_ERR_NAME;
/* "Loop detection" */
@@ -389,7 +389,7 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_NAMEREF]) {
e.refid = ip_set_get_byname(map->net,
- nla_data(tb[IPSET_ATTR_NAMEREF]),
+ tb[IPSET_ATTR_NAMEREF],
&s);
if (e.refid == IPSET_INVALID_ID) {
ret = -IPSET_ERR_NAMEREF;
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c
index a7552a46d6ac..4f39bf7c843f 100644
--- a/net/netfilter/nf_conntrack_broadcast.c
+++ b/net/netfilter/nf_conntrack_broadcast.c
@@ -21,6 +21,7 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb,
unsigned int timeout)
{
const struct nf_conntrack_helper *helper;
+ struct net *net = read_pnet(&ct->ct_net);
struct nf_conntrack_expect *exp;
struct iphdr *iph = ip_hdr(skb);
struct rtable *rt = skb_rtable(skb);
@@ -70,8 +71,11 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb,
exp->expectfn = NULL;
exp->flags = NF_CT_EXPECT_PERMANENT;
exp->class = NF_CT_EXPECT_CLASS_DEFAULT;
- exp->helper = NULL;
-
+ rcu_assign_pointer(exp->helper, helper);
+ write_pnet(&exp->net, net);
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ exp->zone = ct->zone;
+#endif
nf_ct_expect_related(exp, 0);
nf_ct_expect_put(exp);
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 81baf2082604..9df159448b89 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -247,6 +247,8 @@ void nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
struct nf_ct_event_notifier *notify;
struct nf_conntrack_ecache *e;
+ lockdep_nfct_expect_lock_held();
+
rcu_read_lock();
notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
if (!notify)
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index cfc2daa3fc7f..24d0576d84b7 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -51,6 +51,7 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
struct net *net = nf_ct_exp_net(exp);
struct nf_conntrack_net *cnet;
+ lockdep_nfct_expect_lock_held();
WARN_ON(!master_help);
WARN_ON(timer_pending(&exp->timeout));
@@ -112,12 +113,14 @@ nf_ct_exp_equal(const struct nf_conntrack_tuple *tuple,
const struct net *net)
{
return nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
- net_eq(net, nf_ct_net(i->master)) &&
- nf_ct_zone_equal_any(i->master, zone);
+ net_eq(net, read_pnet(&i->net)) &&
+ nf_ct_exp_zone_equal_any(i, zone);
}
bool nf_ct_remove_expect(struct nf_conntrack_expect *exp)
{
+ lockdep_nfct_expect_lock_held();
+
if (timer_delete(&exp->timeout)) {
nf_ct_unlink_expect(exp);
nf_ct_expect_put(exp);
@@ -177,6 +180,8 @@ nf_ct_find_expectation(struct net *net,
struct nf_conntrack_expect *i, *exp = NULL;
unsigned int h;
+ lockdep_nfct_expect_lock_held();
+
if (!cnet->expect_count)
return NULL;
@@ -309,12 +314,20 @@ struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
}
EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
+/* This function can only be used from packet path, where accessing
+ * master's helper is safe, because the packet holds a reference on
+ * the conntrack object. Never use it from control plane.
+ */
void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
u_int8_t family,
const union nf_inet_addr *saddr,
const union nf_inet_addr *daddr,
u_int8_t proto, const __be16 *src, const __be16 *dst)
{
+ struct nf_conntrack_helper *helper = NULL;
+ struct nf_conn *ct = exp->master;
+ struct net *net = read_pnet(&ct->ct_net);
+ struct nf_conn_help *help;
int len;
if (family == AF_INET)
@@ -325,7 +338,16 @@ void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
exp->flags = 0;
exp->class = class;
exp->expectfn = NULL;
- exp->helper = NULL;
+
+ help = nfct_help(ct);
+ if (help)
+ helper = rcu_dereference(help->helper);
+
+ rcu_assign_pointer(exp->helper, helper);
+ write_pnet(&exp->net, net);
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ exp->zone = ct->zone;
+#endif
exp->tuple.src.l3num = family;
exp->tuple.dst.protonum = proto;
@@ -442,6 +464,8 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect,
unsigned int h;
int ret = 0;
+ lockdep_nfct_expect_lock_held();
+
if (!master_help) {
ret = -ESHUTDOWN;
goto out;
@@ -498,8 +522,9 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
nf_ct_expect_insert(expect);
- spin_unlock_bh(&nf_conntrack_expect_lock);
nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
+
return 0;
out:
spin_unlock_bh(&nf_conntrack_expect_lock);
@@ -627,11 +652,15 @@ static int exp_seq_show(struct seq_file *s, void *v)
{
struct nf_conntrack_expect *expect;
struct nf_conntrack_helper *helper;
+ struct net *net = seq_file_net(s);
struct hlist_node *n = v;
char *delim = "";
expect = hlist_entry(n, struct nf_conntrack_expect, hnode);
+ if (!net_eq(nf_ct_exp_net(expect), net))
+ return 0;
+
if (expect->timeout.function)
seq_printf(s, "%ld ", timer_pending(&expect->timeout)
? (long)(expect->timeout.expires - jiffies)/HZ : 0);
@@ -654,7 +683,7 @@ static int exp_seq_show(struct seq_file *s, void *v)
if (expect->flags & NF_CT_EXPECT_USERSPACE)
seq_printf(s, "%sUSERSPACE", delim);
- helper = rcu_dereference(nfct_help(expect->master)->helper);
+ helper = rcu_dereference(expect->helper);
if (helper) {
seq_printf(s, "%s%s", expect->flags ? " " : "", helper->name);
if (helper->expect_policy[expect->class].name[0])
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index a2a0e22ccee1..3f5c50455b71 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -643,7 +643,7 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct,
&ct->tuplehash[!dir].tuple.src.u3,
&ct->tuplehash[!dir].tuple.dst.u3,
IPPROTO_TCP, NULL, &port);
- exp->helper = &nf_conntrack_helper_h245;
+ rcu_assign_pointer(exp->helper, &nf_conntrack_helper_h245);
nathook = rcu_dereference(nfct_h323_nat_hook);
if (memcmp(&ct->tuplehash[dir].tuple.src.u3,
@@ -767,7 +767,7 @@ static int expect_callforwarding(struct sk_buff *skb,
nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
&ct->tuplehash[!dir].tuple.src.u3, &addr,
IPPROTO_TCP, NULL, &port);
- exp->helper = nf_conntrack_helper_q931;
+ rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931);
nathook = rcu_dereference(nfct_h323_nat_hook);
if (memcmp(&ct->tuplehash[dir].tuple.src.u3,
@@ -1234,7 +1234,7 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct,
&ct->tuplehash[!dir].tuple.src.u3 : NULL,
&ct->tuplehash[!dir].tuple.dst.u3,
IPPROTO_TCP, NULL, &port);
- exp->helper = nf_conntrack_helper_q931;
+ rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931);
exp->flags = NF_CT_EXPECT_PERMANENT; /* Accept multiple calls */
nathook = rcu_dereference(nfct_h323_nat_hook);
@@ -1306,7 +1306,7 @@ static int process_gcf(struct sk_buff *skb, struct nf_conn *ct,
nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
&ct->tuplehash[!dir].tuple.src.u3, &addr,
IPPROTO_UDP, NULL, &port);
- exp->helper = nf_conntrack_helper_ras;
+ rcu_assign_pointer(exp->helper, nf_conntrack_helper_ras);
if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_ras: expect RAS ");
@@ -1523,7 +1523,7 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct,
&ct->tuplehash[!dir].tuple.src.u3, &addr,
IPPROTO_TCP, NULL, &port);
exp->flags = NF_CT_EXPECT_PERMANENT;
- exp->helper = nf_conntrack_helper_q931;
+ rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931);
if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_ras: expect Q.931 ");
@@ -1577,7 +1577,7 @@ static int process_lcf(struct sk_buff *skb, struct nf_conn *ct,
&ct->tuplehash[!dir].tuple.src.u3, &addr,
IPPROTO_TCP, NULL, &port);
exp->flags = NF_CT_EXPECT_PERMANENT;
- exp->helper = nf_conntrack_helper_q931;
+ rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931);
if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_ras: expect Q.931 ");
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index ceb48c3ca0a4..a715304a53d8 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -395,14 +395,10 @@ EXPORT_SYMBOL_GPL(nf_conntrack_helper_register);
static bool expect_iter_me(struct nf_conntrack_expect *exp, void *data)
{
- struct nf_conn_help *help = nfct_help(exp->master);
const struct nf_conntrack_helper *me = data;
const struct nf_conntrack_helper *this;
- if (exp->helper == me)
- return true;
-
- this = rcu_dereference_protected(help->helper,
+ this = rcu_dereference_protected(exp->helper,
lockdep_is_held(&nf_conntrack_expect_lock));
return this == me;
}
@@ -419,8 +415,13 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
*/
synchronize_rcu();
- nf_ct_expect_iterate_destroy(expect_iter_me, NULL);
+ nf_ct_expect_iterate_destroy(expect_iter_me, me);
nf_ct_iterate_destroy(unhelp, me);
+
+ /* nf_ct_iterate_destroy() does an unconditional synchronize_rcu() as
+ * last step, this ensures rcu readers of exp->helper are done.
+ * No need for another synchronize_rcu() here.
+ */
}
EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index c156574e1273..a20cd82446c5 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -910,8 +910,8 @@ struct ctnetlink_filter {
};
static const struct nla_policy cta_filter_nla_policy[CTA_FILTER_MAX + 1] = {
- [CTA_FILTER_ORIG_FLAGS] = { .type = NLA_U32 },
- [CTA_FILTER_REPLY_FLAGS] = { .type = NLA_U32 },
+ [CTA_FILTER_ORIG_FLAGS] = NLA_POLICY_MASK(NLA_U32, CTA_FILTER_F_ALL),
+ [CTA_FILTER_REPLY_FLAGS] = NLA_POLICY_MASK(NLA_U32, CTA_FILTER_F_ALL),
};
static int ctnetlink_parse_filter(const struct nlattr *attr,
@@ -925,17 +925,11 @@ static int ctnetlink_parse_filter(const struct nlattr *attr,
if (ret)
return ret;
- if (tb[CTA_FILTER_ORIG_FLAGS]) {
+ if (tb[CTA_FILTER_ORIG_FLAGS])
filter->orig_flags = nla_get_u32(tb[CTA_FILTER_ORIG_FLAGS]);
- if (filter->orig_flags & ~CTA_FILTER_F_ALL)
- return -EOPNOTSUPP;
- }
- if (tb[CTA_FILTER_REPLY_FLAGS]) {
+ if (tb[CTA_FILTER_REPLY_FLAGS])
filter->reply_flags = nla_get_u32(tb[CTA_FILTER_REPLY_FLAGS]);
- if (filter->reply_flags & ~CTA_FILTER_F_ALL)
- return -EOPNOTSUPP;
- }
return 0;
}
@@ -2634,7 +2628,7 @@ static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
[CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING,
.len = NF_CT_HELPER_NAME_LEN - 1 },
[CTA_EXPECT_ZONE] = { .type = NLA_U16 },
- [CTA_EXPECT_FLAGS] = { .type = NLA_U32 },
+ [CTA_EXPECT_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NF_CT_EXPECT_MASK),
[CTA_EXPECT_CLASS] = { .type = NLA_U32 },
[CTA_EXPECT_NAT] = { .type = NLA_NESTED },
[CTA_EXPECT_FN] = { .type = NLA_NUL_STRING },
@@ -2642,7 +2636,6 @@ static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
static struct nf_conntrack_expect *
ctnetlink_alloc_expect(const struct nlattr *const cda[], struct nf_conn *ct,
- struct nf_conntrack_helper *helper,
struct nf_conntrack_tuple *tuple,
struct nf_conntrack_tuple *mask);
@@ -2871,7 +2864,6 @@ ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
{
struct nlattr *cda[CTA_EXPECT_MAX+1];
struct nf_conntrack_tuple tuple, mask;
- struct nf_conntrack_helper *helper = NULL;
struct nf_conntrack_expect *exp;
int err;
@@ -2885,17 +2877,8 @@ ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
if (err < 0)
return err;
- if (cda[CTA_EXPECT_HELP_NAME]) {
- const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]);
-
- helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
- nf_ct_protonum(ct));
- if (helper == NULL)
- return -EOPNOTSUPP;
- }
-
exp = ctnetlink_alloc_expect((const struct nlattr * const *)cda, ct,
- helper, &tuple, &mask);
+ &tuple, &mask);
if (IS_ERR(exp))
return PTR_ERR(exp);
@@ -3012,7 +2995,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
{
struct nf_conn *master = exp->master;
long timeout = ((long)exp->timeout.expires - (long)jiffies) / HZ;
- struct nf_conn_help *help;
+ struct nf_conntrack_helper *helper;
#if IS_ENABLED(CONFIG_NF_NAT)
struct nlattr *nest_parms;
struct nf_conntrack_tuple nat_tuple = {};
@@ -3057,15 +3040,12 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
nla_put_be32(skb, CTA_EXPECT_FLAGS, htonl(exp->flags)) ||
nla_put_be32(skb, CTA_EXPECT_CLASS, htonl(exp->class)))
goto nla_put_failure;
- help = nfct_help(master);
- if (help) {
- struct nf_conntrack_helper *helper;
- helper = rcu_dereference(help->helper);
- if (helper &&
- nla_put_string(skb, CTA_EXPECT_HELP_NAME, helper->name))
- goto nla_put_failure;
- }
+ helper = rcu_dereference(exp->helper);
+ if (helper &&
+ nla_put_string(skb, CTA_EXPECT_HELP_NAME, helper->name))
+ goto nla_put_failure;
+
expfn = nf_ct_helper_expectfn_find_by_symbol(exp->expectfn);
if (expfn != NULL &&
nla_put_string(skb, CTA_EXPECT_FN, expfn->name))
@@ -3358,31 +3338,37 @@ static int ctnetlink_get_expect(struct sk_buff *skb,
if (err < 0)
return err;
+ skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!skb2)
+ return -ENOMEM;
+
+ spin_lock_bh(&nf_conntrack_expect_lock);
exp = nf_ct_expect_find_get(info->net, &zone, &tuple);
- if (!exp)
+ if (!exp) {
+ spin_unlock_bh(&nf_conntrack_expect_lock);
+ kfree_skb(skb2);
return -ENOENT;
+ }
if (cda[CTA_EXPECT_ID]) {
__be32 id = nla_get_be32(cda[CTA_EXPECT_ID]);
if (id != nf_expect_get_id(exp)) {
nf_ct_expect_put(exp);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
+ kfree_skb(skb2);
return -ENOENT;
}
}
- skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!skb2) {
- nf_ct_expect_put(exp);
- return -ENOMEM;
- }
-
rcu_read_lock();
err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).portid,
info->nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW,
exp);
rcu_read_unlock();
nf_ct_expect_put(exp);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
+
if (err <= 0) {
kfree_skb(skb2);
return -ENOMEM;
@@ -3394,12 +3380,9 @@ static int ctnetlink_get_expect(struct sk_buff *skb,
static bool expect_iter_name(struct nf_conntrack_expect *exp, void *data)
{
struct nf_conntrack_helper *helper;
- const struct nf_conn_help *m_help;
const char *name = data;
- m_help = nfct_help(exp->master);
-
- helper = rcu_dereference(m_help->helper);
+ helper = rcu_dereference(exp->helper);
if (!helper)
return false;
@@ -3432,22 +3415,26 @@ static int ctnetlink_del_expect(struct sk_buff *skb,
if (err < 0)
return err;
+ spin_lock_bh(&nf_conntrack_expect_lock);
+
/* bump usage count to 2 */
exp = nf_ct_expect_find_get(info->net, &zone, &tuple);
- if (!exp)
+ if (!exp) {
+ spin_unlock_bh(&nf_conntrack_expect_lock);
return -ENOENT;
+ }
if (cda[CTA_EXPECT_ID]) {
__be32 id = nla_get_be32(cda[CTA_EXPECT_ID]);
if (id != nf_expect_get_id(exp)) {
nf_ct_expect_put(exp);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
return -ENOENT;
}
}
/* after list removal, usage count == 1 */
- spin_lock_bh(&nf_conntrack_expect_lock);
if (timer_delete(&exp->timeout)) {
nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).portid,
nlmsg_report(info->nlh));
@@ -3530,20 +3517,25 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr,
static struct nf_conntrack_expect *
ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct,
- struct nf_conntrack_helper *helper,
struct nf_conntrack_tuple *tuple,
struct nf_conntrack_tuple *mask)
{
- u_int32_t class = 0;
+ struct net *net = read_pnet(&ct->ct_net);
+ struct nf_conntrack_helper *helper;
struct nf_conntrack_expect *exp;
struct nf_conn_help *help;
+ u32 class = 0;
int err;
help = nfct_help(ct);
if (!help)
return ERR_PTR(-EOPNOTSUPP);
- if (cda[CTA_EXPECT_CLASS] && helper) {
+ helper = rcu_dereference(help->helper);
+ if (!helper)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (cda[CTA_EXPECT_CLASS]) {
class = ntohl(nla_get_be32(cda[CTA_EXPECT_CLASS]));
if (class > helper->expect_class_max)
return ERR_PTR(-EINVAL);
@@ -3573,7 +3565,11 @@ ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct,
exp->class = class;
exp->master = ct;
- exp->helper = helper;
+ write_pnet(&exp->net, net);
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ exp->zone = ct->zone;
+#endif
+ rcu_assign_pointer(exp->helper, helper);
exp->tuple = *tuple;
exp->mask.src.u3 = mask->src.u3;
exp->mask.src.u.all = mask->src.u.all;
@@ -3583,6 +3579,12 @@ ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct,
exp, nf_ct_l3num(ct));
if (err < 0)
goto err_out;
+#if IS_ENABLED(CONFIG_NF_NAT)
+ } else {
+ memset(&exp->saved_addr, 0, sizeof(exp->saved_addr));
+ memset(&exp->saved_proto, 0, sizeof(exp->saved_proto));
+ exp->dir = 0;
+#endif
}
return exp;
err_out:
@@ -3598,7 +3600,6 @@ ctnetlink_create_expect(struct net *net,
{
struct nf_conntrack_tuple tuple, mask, master_tuple;
struct nf_conntrack_tuple_hash *h = NULL;
- struct nf_conntrack_helper *helper = NULL;
struct nf_conntrack_expect *exp;
struct nf_conn *ct;
int err;
@@ -3624,33 +3625,7 @@ ctnetlink_create_expect(struct net *net,
ct = nf_ct_tuplehash_to_ctrack(h);
rcu_read_lock();
- if (cda[CTA_EXPECT_HELP_NAME]) {
- const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]);
-
- helper = __nf_conntrack_helper_find(helpname, u3,
- nf_ct_protonum(ct));
- if (helper == NULL) {
- rcu_read_unlock();
-#ifdef CONFIG_MODULES
- if (request_module("nfct-helper-%s", helpname) < 0) {
- err = -EOPNOTSUPP;
- goto err_ct;
- }
- rcu_read_lock();
- helper = __nf_conntrack_helper_find(helpname, u3,
- nf_ct_protonum(ct));
- if (helper) {
- err = -EAGAIN;
- goto err_rcu;
- }
- rcu_read_unlock();
-#endif
- err = -EOPNOTSUPP;
- goto err_ct;
- }
- }
-
- exp = ctnetlink_alloc_expect(cda, ct, helper, &tuple, &mask);
+ exp = ctnetlink_alloc_expect(cda, ct, &tuple, &mask);
if (IS_ERR(exp)) {
err = PTR_ERR(exp);
goto err_rcu;
@@ -3660,8 +3635,8 @@ ctnetlink_create_expect(struct net *net,
nf_ct_expect_put(exp);
err_rcu:
rcu_read_unlock();
-err_ct:
nf_ct_put(ct);
+
return err;
}
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 0c1d086e96cb..b67426c2189b 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1385,9 +1385,9 @@ nla_put_failure:
}
static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
- [CTA_PROTOINFO_TCP_STATE] = { .type = NLA_U8 },
- [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 },
- [CTA_PROTOINFO_TCP_WSCALE_REPLY] = { .type = NLA_U8 },
+ [CTA_PROTOINFO_TCP_STATE] = NLA_POLICY_MAX(NLA_U8, TCP_CONNTRACK_SYN_SENT2),
+ [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = NLA_POLICY_MAX(NLA_U8, TCP_MAX_WSCALE),
+ [CTA_PROTOINFO_TCP_WSCALE_REPLY] = NLA_POLICY_MAX(NLA_U8, TCP_MAX_WSCALE),
[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL] = { .len = sizeof(struct nf_ct_tcp_flags) },
[CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .len = sizeof(struct nf_ct_tcp_flags) },
};
@@ -1414,10 +1414,6 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
if (err < 0)
return err;
- if (tb[CTA_PROTOINFO_TCP_STATE] &&
- nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
- return -EINVAL;
-
spin_lock_bh(&ct->lock);
if (tb[CTA_PROTOINFO_TCP_STATE])
ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 4ab5ef71d96d..939502ff7c87 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -924,7 +924,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
exp = __nf_ct_expect_find(net, nf_ct_zone(ct), &tuple);
if (!exp || exp->master == ct ||
- nfct_help(exp->master)->helper != nfct_help(ct)->helper ||
+ exp->helper != nfct_help(ct)->helper ||
exp->class != class)
break;
#if IS_ENABLED(CONFIG_NF_NAT)
@@ -1040,6 +1040,7 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff,
unsigned int port;
const struct sdp_media_type *t;
int ret = NF_ACCEPT;
+ bool have_rtp_addr = false;
hooks = rcu_dereference(nf_nat_sip_hooks);
@@ -1056,8 +1057,11 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff,
caddr_len = 0;
if (ct_sip_parse_sdp_addr(ct, *dptr, sdpoff, *datalen,
SDP_HDR_CONNECTION, SDP_HDR_MEDIA,
- &matchoff, &matchlen, &caddr) > 0)
+ &matchoff, &matchlen, &caddr) > 0) {
caddr_len = matchlen;
+ memcpy(&rtp_addr, &caddr, sizeof(rtp_addr));
+ have_rtp_addr = true;
+ }
mediaoff = sdpoff;
for (i = 0; i < ARRAY_SIZE(sdp_media_types); ) {
@@ -1091,9 +1095,11 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff,
&matchoff, &matchlen, &maddr) > 0) {
maddr_len = matchlen;
memcpy(&rtp_addr, &maddr, sizeof(rtp_addr));
- } else if (caddr_len)
+ have_rtp_addr = true;
+ } else if (caddr_len) {
memcpy(&rtp_addr, &caddr, sizeof(rtp_addr));
- else {
+ have_rtp_addr = true;
+ } else {
nf_ct_helper_log(skb, ct, "cannot parse SDP message");
return NF_DROP;
}
@@ -1125,7 +1131,7 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff,
/* Update session connection and owner addresses */
hooks = rcu_dereference(nf_nat_sip_hooks);
- if (hooks && ct->status & IPS_NAT_MASK)
+ if (hooks && ct->status & IPS_NAT_MASK && have_rtp_addr)
ret = hooks->sdp_session(skb, protoff, dataoff,
dptr, datalen, sdpoff,
&rtp_addr);
@@ -1297,7 +1303,7 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff,
nf_ct_expect_init(exp, SIP_EXPECT_SIGNALLING, nf_ct_l3num(ct),
saddr, &daddr, proto, NULL, &port);
exp->timeout.expires = sip_timeout * HZ;
- exp->helper = helper;
+ rcu_assign_pointer(exp->helper, helper);
exp->flags = NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE;
hooks = rcu_dereference(nf_nat_sip_hooks);
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index 9b677e116487..93d0aa7f8fcc 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -14,6 +14,8 @@
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_tuple.h>
+#define NF_FLOW_RULE_ACTION_MAX 24
+
static struct workqueue_struct *nf_flow_offload_add_wq;
static struct workqueue_struct *nf_flow_offload_del_wq;
static struct workqueue_struct *nf_flow_offload_stats_wq;
@@ -216,7 +218,12 @@ static void flow_offload_mangle(struct flow_action_entry *entry,
static inline struct flow_action_entry *
flow_action_entry_next(struct nf_flow_rule *flow_rule)
{
- int i = flow_rule->rule->action.num_entries++;
+ int i;
+
+ if (unlikely(flow_rule->rule->action.num_entries >= NF_FLOW_RULE_ACTION_MAX))
+ return NULL;
+
+ i = flow_rule->rule->action.num_entries++;
return &flow_rule->rule->action.entries[i];
}
@@ -234,6 +241,9 @@ static int flow_offload_eth_src(struct net *net,
u32 mask, val;
u16 val16;
+ if (!entry0 || !entry1)
+ return -E2BIG;
+
this_tuple = &flow->tuplehash[dir].tuple;
switch (this_tuple->xmit_type) {
@@ -284,6 +294,9 @@ static int flow_offload_eth_dst(struct net *net,
u8 nud_state;
u16 val16;
+ if (!entry0 || !entry1)
+ return -E2BIG;
+
this_tuple = &flow->tuplehash[dir].tuple;
switch (this_tuple->xmit_type) {
@@ -325,16 +338,19 @@ static int flow_offload_eth_dst(struct net *net,
return 0;
}
-static void flow_offload_ipv4_snat(struct net *net,
- const struct flow_offload *flow,
- enum flow_offload_tuple_dir dir,
- struct nf_flow_rule *flow_rule)
+static int flow_offload_ipv4_snat(struct net *net,
+ const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
{
struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
u32 mask = ~htonl(0xffffffff);
__be32 addr;
u32 offset;
+ if (!entry)
+ return -E2BIG;
+
switch (dir) {
case FLOW_OFFLOAD_DIR_ORIGINAL:
addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
@@ -345,23 +361,27 @@ static void flow_offload_ipv4_snat(struct net *net,
offset = offsetof(struct iphdr, daddr);
break;
default:
- return;
+ return -EOPNOTSUPP;
}
flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
&addr, &mask);
+ return 0;
}
-static void flow_offload_ipv4_dnat(struct net *net,
- const struct flow_offload *flow,
- enum flow_offload_tuple_dir dir,
- struct nf_flow_rule *flow_rule)
+static int flow_offload_ipv4_dnat(struct net *net,
+ const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
{
struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
u32 mask = ~htonl(0xffffffff);
__be32 addr;
u32 offset;
+ if (!entry)
+ return -E2BIG;
+
switch (dir) {
case FLOW_OFFLOAD_DIR_ORIGINAL:
addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
@@ -372,14 +392,15 @@ static void flow_offload_ipv4_dnat(struct net *net,
offset = offsetof(struct iphdr, saddr);
break;
default:
- return;
+ return -EOPNOTSUPP;
}
flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
&addr, &mask);
+ return 0;
}
-static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule,
+static int flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule,
unsigned int offset,
const __be32 *addr, const __be32 *mask)
{
@@ -388,15 +409,20 @@ static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule,
for (i = 0; i < sizeof(struct in6_addr) / sizeof(u32); i++) {
entry = flow_action_entry_next(flow_rule);
+ if (!entry)
+ return -E2BIG;
+
flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6,
offset + i * sizeof(u32), &addr[i], mask);
}
+
+ return 0;
}
-static void flow_offload_ipv6_snat(struct net *net,
- const struct flow_offload *flow,
- enum flow_offload_tuple_dir dir,
- struct nf_flow_rule *flow_rule)
+static int flow_offload_ipv6_snat(struct net *net,
+ const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
{
u32 mask = ~htonl(0xffffffff);
const __be32 *addr;
@@ -412,16 +438,16 @@ static void flow_offload_ipv6_snat(struct net *net,
offset = offsetof(struct ipv6hdr, daddr);
break;
default:
- return;
+ return -EOPNOTSUPP;
}
- flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
+ return flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
}
-static void flow_offload_ipv6_dnat(struct net *net,
- const struct flow_offload *flow,
- enum flow_offload_tuple_dir dir,
- struct nf_flow_rule *flow_rule)
+static int flow_offload_ipv6_dnat(struct net *net,
+ const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
{
u32 mask = ~htonl(0xffffffff);
const __be32 *addr;
@@ -437,10 +463,10 @@ static void flow_offload_ipv6_dnat(struct net *net,
offset = offsetof(struct ipv6hdr, saddr);
break;
default:
- return;
+ return -EOPNOTSUPP;
}
- flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
+ return flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
}
static int flow_offload_l4proto(const struct flow_offload *flow)
@@ -462,15 +488,18 @@ static int flow_offload_l4proto(const struct flow_offload *flow)
return type;
}
-static void flow_offload_port_snat(struct net *net,
- const struct flow_offload *flow,
- enum flow_offload_tuple_dir dir,
- struct nf_flow_rule *flow_rule)
+static int flow_offload_port_snat(struct net *net,
+ const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
{
struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
u32 mask, port;
u32 offset;
+ if (!entry)
+ return -E2BIG;
+
switch (dir) {
case FLOW_OFFLOAD_DIR_ORIGINAL:
port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port);
@@ -485,22 +514,26 @@ static void flow_offload_port_snat(struct net *net,
mask = ~htonl(0xffff);
break;
default:
- return;
+ return -EOPNOTSUPP;
}
flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
&port, &mask);
+ return 0;
}
-static void flow_offload_port_dnat(struct net *net,
- const struct flow_offload *flow,
- enum flow_offload_tuple_dir dir,
- struct nf_flow_rule *flow_rule)
+static int flow_offload_port_dnat(struct net *net,
+ const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
{
struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
u32 mask, port;
u32 offset;
+ if (!entry)
+ return -E2BIG;
+
switch (dir) {
case FLOW_OFFLOAD_DIR_ORIGINAL:
port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port);
@@ -515,20 +548,24 @@ static void flow_offload_port_dnat(struct net *net,
mask = ~htonl(0xffff0000);
break;
default:
- return;
+ return -EOPNOTSUPP;
}
flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
&port, &mask);
+ return 0;
}
-static void flow_offload_ipv4_checksum(struct net *net,
- const struct flow_offload *flow,
- struct nf_flow_rule *flow_rule)
+static int flow_offload_ipv4_checksum(struct net *net,
+ const struct flow_offload *flow,
+ struct nf_flow_rule *flow_rule)
{
u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
+ if (!entry)
+ return -E2BIG;
+
entry->id = FLOW_ACTION_CSUM;
entry->csum_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR;
@@ -540,12 +577,14 @@ static void flow_offload_ipv4_checksum(struct net *net,
entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_UDP;
break;
}
+
+ return 0;
}
-static void flow_offload_redirect(struct net *net,
- const struct flow_offload *flow,
- enum flow_offload_tuple_dir dir,
- struct nf_flow_rule *flow_rule)
+static int flow_offload_redirect(struct net *net,
+ const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
{
const struct flow_offload_tuple *this_tuple, *other_tuple;
struct flow_action_entry *entry;
@@ -563,21 +602,28 @@ static void flow_offload_redirect(struct net *net,
ifindex = other_tuple->iifidx;
break;
default:
- return;
+ return -EOPNOTSUPP;
}
dev = dev_get_by_index(net, ifindex);
if (!dev)
- return;
+ return -ENODEV;
entry = flow_action_entry_next(flow_rule);
+ if (!entry) {
+ dev_put(dev);
+ return -E2BIG;
+ }
+
entry->id = FLOW_ACTION_REDIRECT;
entry->dev = dev;
+
+ return 0;
}
-static void flow_offload_encap_tunnel(const struct flow_offload *flow,
- enum flow_offload_tuple_dir dir,
- struct nf_flow_rule *flow_rule)
+static int flow_offload_encap_tunnel(const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
{
const struct flow_offload_tuple *this_tuple;
struct flow_action_entry *entry;
@@ -585,7 +631,7 @@ static void flow_offload_encap_tunnel(const struct flow_offload *flow,
this_tuple = &flow->tuplehash[dir].tuple;
if (this_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
- return;
+ return 0;
dst = this_tuple->dst_cache;
if (dst && dst->lwtstate) {
@@ -594,15 +640,19 @@ static void flow_offload_encap_tunnel(const struct flow_offload *flow,
tun_info = lwt_tun_info(dst->lwtstate);
if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
entry = flow_action_entry_next(flow_rule);
+ if (!entry)
+ return -E2BIG;
entry->id = FLOW_ACTION_TUNNEL_ENCAP;
entry->tunnel = tun_info;
}
}
+
+ return 0;
}
-static void flow_offload_decap_tunnel(const struct flow_offload *flow,
- enum flow_offload_tuple_dir dir,
- struct nf_flow_rule *flow_rule)
+static int flow_offload_decap_tunnel(const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
{
const struct flow_offload_tuple *other_tuple;
struct flow_action_entry *entry;
@@ -610,7 +660,7 @@ static void flow_offload_decap_tunnel(const struct flow_offload *flow,
other_tuple = &flow->tuplehash[!dir].tuple;
if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
- return;
+ return 0;
dst = other_tuple->dst_cache;
if (dst && dst->lwtstate) {
@@ -619,9 +669,13 @@ static void flow_offload_decap_tunnel(const struct flow_offload *flow,
tun_info = lwt_tun_info(dst->lwtstate);
if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
entry = flow_action_entry_next(flow_rule);
+ if (!entry)
+ return -E2BIG;
entry->id = FLOW_ACTION_TUNNEL_DECAP;
}
}
+
+ return 0;
}
static int
@@ -633,8 +687,9 @@ nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
const struct flow_offload_tuple *tuple;
int i;
- flow_offload_decap_tunnel(flow, dir, flow_rule);
- flow_offload_encap_tunnel(flow, dir, flow_rule);
+ if (flow_offload_decap_tunnel(flow, dir, flow_rule) < 0 ||
+ flow_offload_encap_tunnel(flow, dir, flow_rule) < 0)
+ return -1;
if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
@@ -650,6 +705,8 @@ nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
if (tuple->encap[i].proto == htons(ETH_P_8021Q)) {
entry = flow_action_entry_next(flow_rule);
+ if (!entry)
+ return -1;
entry->id = FLOW_ACTION_VLAN_POP;
}
}
@@ -663,6 +720,8 @@ nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
continue;
entry = flow_action_entry_next(flow_rule);
+ if (!entry)
+ return -1;
switch (other_tuple->encap[i].proto) {
case htons(ETH_P_PPP_SES):
@@ -688,18 +747,22 @@ int nf_flow_rule_route_ipv4(struct net *net, struct flow_offload *flow,
return -1;
if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
- flow_offload_ipv4_snat(net, flow, dir, flow_rule);
- flow_offload_port_snat(net, flow, dir, flow_rule);
+ if (flow_offload_ipv4_snat(net, flow, dir, flow_rule) < 0 ||
+ flow_offload_port_snat(net, flow, dir, flow_rule) < 0)
+ return -1;
}
if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
- flow_offload_ipv4_dnat(net, flow, dir, flow_rule);
- flow_offload_port_dnat(net, flow, dir, flow_rule);
+ if (flow_offload_ipv4_dnat(net, flow, dir, flow_rule) < 0 ||
+ flow_offload_port_dnat(net, flow, dir, flow_rule) < 0)
+ return -1;
}
if (test_bit(NF_FLOW_SNAT, &flow->flags) ||
test_bit(NF_FLOW_DNAT, &flow->flags))
- flow_offload_ipv4_checksum(net, flow, flow_rule);
+ if (flow_offload_ipv4_checksum(net, flow, flow_rule) < 0)
+ return -1;
- flow_offload_redirect(net, flow, dir, flow_rule);
+ if (flow_offload_redirect(net, flow, dir, flow_rule) < 0)
+ return -1;
return 0;
}
@@ -713,22 +776,23 @@ int nf_flow_rule_route_ipv6(struct net *net, struct flow_offload *flow,
return -1;
if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
- flow_offload_ipv6_snat(net, flow, dir, flow_rule);
- flow_offload_port_snat(net, flow, dir, flow_rule);
+ if (flow_offload_ipv6_snat(net, flow, dir, flow_rule) < 0 ||
+ flow_offload_port_snat(net, flow, dir, flow_rule) < 0)
+ return -1;
}
if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
- flow_offload_ipv6_dnat(net, flow, dir, flow_rule);
- flow_offload_port_dnat(net, flow, dir, flow_rule);
+ if (flow_offload_ipv6_dnat(net, flow, dir, flow_rule) < 0 ||
+ flow_offload_port_dnat(net, flow, dir, flow_rule) < 0)
+ return -1;
}
- flow_offload_redirect(net, flow, dir, flow_rule);
+ if (flow_offload_redirect(net, flow, dir, flow_rule) < 0)
+ return -1;
return 0;
}
EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv6);
-#define NF_FLOW_RULE_ACTION_MAX 16
-
static struct nf_flow_rule *
nf_flow_offload_rule_alloc(struct net *net,
const struct flow_offload_work *offload,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 3922cff1bb3d..8c42247a176c 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -11667,8 +11667,6 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
switch (data->verdict.code) {
case NF_ACCEPT:
case NF_DROP:
- case NF_QUEUE:
- break;
case NFT_CONTINUE:
case NFT_BREAK:
case NFT_RETURN:
@@ -11703,6 +11701,11 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
data->verdict.chain = chain;
break;
+ case NF_QUEUE:
+ /* The nft_queue expression is used for this purpose, an
+ * immediate NF_QUEUE verdict should not ever be seen here.
+ */
+ fallthrough;
default:
return -EINVAL;
}
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index b35a90955e2e..f80978c06fa0 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -647,15 +647,11 @@ __build_packet_message(struct nfnl_log_net *log,
if (data_len) {
struct nlattr *nla;
- int size = nla_attr_size(data_len);
- if (skb_tailroom(inst->skb) < nla_total_size(data_len))
+ nla = nla_reserve(inst->skb, NFULA_PAYLOAD, data_len);
+ if (!nla)
goto nla_put_failure;
- nla = skb_put(inst->skb, nla_total_size(data_len));
- nla->nla_type = NFULA_PAYLOAD;
- nla->nla_len = size;
-
if (skb_copy_bits(skb, 0, nla_data(nla), data_len))
BUG();
}
@@ -730,7 +726,7 @@ nfulnl_log_packet(struct net *net,
+ nla_total_size(plen) /* prefix */
+ nla_total_size(sizeof(struct nfulnl_msg_packet_hw))
+ nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp))
- + nla_total_size(sizeof(struct nfgenmsg)); /* NLMSG_DONE */
+ + nlmsg_total_size(sizeof(struct nfgenmsg)); /* NLMSG_DONE */
if (in && skb_mac_header_was_set(skb)) {
size += nla_total_size(skb->dev->hard_header_len)
diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c
index 7ff90325c97f..6395982e4d95 100644
--- a/net/netfilter/nft_set_pipapo_avx2.c
+++ b/net/netfilter/nft_set_pipapo_avx2.c
@@ -242,7 +242,7 @@ static int nft_pipapo_avx2_lookup_4b_2(unsigned long *map, unsigned long *fill,
b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
if (last)
- return b;
+ ret = b;
if (unlikely(ret == -1))
ret = b / XSAVE_YMM_SIZE;
@@ -319,7 +319,7 @@ static int nft_pipapo_avx2_lookup_4b_4(unsigned long *map, unsigned long *fill,
b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
if (last)
- return b;
+ ret = b;
if (unlikely(ret == -1))
ret = b / XSAVE_YMM_SIZE;
@@ -414,7 +414,7 @@ static int nft_pipapo_avx2_lookup_4b_8(unsigned long *map, unsigned long *fill,
b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
if (last)
- return b;
+ ret = b;
if (unlikely(ret == -1))
ret = b / XSAVE_YMM_SIZE;
@@ -505,7 +505,7 @@ static int nft_pipapo_avx2_lookup_4b_12(unsigned long *map, unsigned long *fill,
b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
if (last)
- return b;
+ ret = b;
if (unlikely(ret == -1))
ret = b / XSAVE_YMM_SIZE;
@@ -641,7 +641,7 @@ static int nft_pipapo_avx2_lookup_4b_32(unsigned long *map, unsigned long *fill,
b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
if (last)
- return b;
+ ret = b;
if (unlikely(ret == -1))
ret = b / XSAVE_YMM_SIZE;
@@ -699,7 +699,7 @@ static int nft_pipapo_avx2_lookup_8b_1(unsigned long *map, unsigned long *fill,
b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
if (last)
- return b;
+ ret = b;
if (unlikely(ret == -1))
ret = b / XSAVE_YMM_SIZE;
@@ -764,7 +764,7 @@ static int nft_pipapo_avx2_lookup_8b_2(unsigned long *map, unsigned long *fill,
b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
if (last)
- return b;
+ ret = b;
if (unlikely(ret == -1))
ret = b / XSAVE_YMM_SIZE;
@@ -839,7 +839,7 @@ static int nft_pipapo_avx2_lookup_8b_4(unsigned long *map, unsigned long *fill,
b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
if (last)
- return b;
+ ret = b;
if (unlikely(ret == -1))
ret = b / XSAVE_YMM_SIZE;
@@ -925,7 +925,7 @@ static int nft_pipapo_avx2_lookup_8b_6(unsigned long *map, unsigned long *fill,
b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
if (last)
- return b;
+ ret = b;
if (unlikely(ret == -1))
ret = b / XSAVE_YMM_SIZE;
@@ -1019,7 +1019,7 @@ static int nft_pipapo_avx2_lookup_8b_16(unsigned long *map, unsigned long *fill,
b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
if (last)
- return b;
+ ret = b;
if (unlikely(ret == -1))
ret = b / XSAVE_YMM_SIZE;
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index fe8bd497d74a..737c339decd0 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -572,14 +572,12 @@ static struct nft_array *nft_array_alloc(u32 max_intervals)
return array;
}
-#define NFT_ARRAY_EXTRA_SIZE 10240
-
/* Similar to nft_rbtree_{u,k}size to hide details to userspace, but consider
* packed representation coming from userspace for anonymous sets too.
*/
static u32 nft_array_elems(const struct nft_set *set)
{
- u32 nelems = atomic_read(&set->nelems);
+ u32 nelems = atomic_read(&set->nelems) - set->ndeact;
/* Adjacent intervals are represented with a single start element in
* anonymous sets, use the current element counter as is.
@@ -595,27 +593,87 @@ static u32 nft_array_elems(const struct nft_set *set)
return (nelems / 2) + 2;
}
-static int nft_array_may_resize(const struct nft_set *set)
+#define NFT_ARRAY_INITIAL_SIZE 1024
+#define NFT_ARRAY_INITIAL_ANON_SIZE 16
+#define NFT_ARRAY_INITIAL_ANON_THRESH (8192U / sizeof(struct nft_array_interval))
+
+static int nft_array_may_resize(const struct nft_set *set, bool flush)
{
- u32 nelems = nft_array_elems(set), new_max_intervals;
+ u32 initial_intervals, max_intervals, new_max_intervals, delta;
+ u32 shrinked_max_intervals, nelems = nft_array_elems(set);
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_array *array;
- if (!priv->array_next) {
- array = nft_array_alloc(nelems + NFT_ARRAY_EXTRA_SIZE);
- if (!array)
- return -ENOMEM;
+ if (nft_set_is_anonymous(set))
+ initial_intervals = NFT_ARRAY_INITIAL_ANON_SIZE;
+ else
+ initial_intervals = NFT_ARRAY_INITIAL_SIZE;
+
+ if (priv->array_next) {
+ max_intervals = priv->array_next->max_intervals;
+ new_max_intervals = priv->array_next->max_intervals;
+ } else {
+ if (priv->array) {
+ max_intervals = priv->array->max_intervals;
+ new_max_intervals = priv->array->max_intervals;
+ } else {
+ max_intervals = 0;
+ new_max_intervals = initial_intervals;
+ }
+ }
- priv->array_next = array;
+ if (nft_set_is_anonymous(set))
+ goto maybe_grow;
+
+ if (flush) {
+ /* Set flush just started, nelems still report elements.*/
+ nelems = 0;
+ new_max_intervals = NFT_ARRAY_INITIAL_SIZE;
+ goto realloc_array;
}
- if (nelems < priv->array_next->max_intervals)
- return 0;
+ if (check_add_overflow(new_max_intervals, new_max_intervals,
+ &shrinked_max_intervals))
+ return -EOVERFLOW;
+
+ shrinked_max_intervals = DIV_ROUND_UP(shrinked_max_intervals, 3);
- new_max_intervals = priv->array_next->max_intervals + NFT_ARRAY_EXTRA_SIZE;
- if (nft_array_intervals_alloc(priv->array_next, new_max_intervals) < 0)
+ if (shrinked_max_intervals > NFT_ARRAY_INITIAL_SIZE &&
+ nelems < shrinked_max_intervals) {
+ new_max_intervals = shrinked_max_intervals;
+ goto realloc_array;
+ }
+maybe_grow:
+ if (nelems > new_max_intervals) {
+ if (nft_set_is_anonymous(set) &&
+ new_max_intervals < NFT_ARRAY_INITIAL_ANON_THRESH) {
+ new_max_intervals <<= 1;
+ } else {
+ delta = new_max_intervals >> 1;
+ if (check_add_overflow(new_max_intervals, delta,
+ &new_max_intervals))
+ return -EOVERFLOW;
+ }
+ }
+
+realloc_array:
+ if (WARN_ON_ONCE(nelems > new_max_intervals))
return -ENOMEM;
+ if (priv->array_next) {
+ if (max_intervals == new_max_intervals)
+ return 0;
+
+ if (nft_array_intervals_alloc(priv->array_next, new_max_intervals) < 0)
+ return -ENOMEM;
+ } else {
+ array = nft_array_alloc(new_max_intervals);
+ if (!array)
+ return -ENOMEM;
+
+ priv->array_next = array;
+ }
+
return 0;
}
@@ -630,7 +688,7 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
nft_rbtree_maybe_reset_start_cookie(priv, tstamp);
- if (nft_array_may_resize(set) < 0)
+ if (nft_array_may_resize(set, false) < 0)
return -ENOMEM;
do {
@@ -741,7 +799,7 @@ nft_rbtree_deactivate(const struct net *net, const struct nft_set *set,
nft_rbtree_interval_null(set, this))
priv->start_rbe_cookie = 0;
- if (nft_array_may_resize(set) < 0)
+ if (nft_array_may_resize(set, false) < 0)
return NULL;
while (parent != NULL) {
@@ -811,7 +869,7 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
switch (iter->type) {
case NFT_ITER_UPDATE_CLONE:
- if (nft_array_may_resize(set) < 0) {
+ if (nft_array_may_resize(set, true) < 0) {
iter->err = -ENOMEM;
break;
}
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index e594b3b7ad82..b39017c80548 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -501,6 +501,17 @@ int xt_check_match(struct xt_mtchk_param *par,
par->match->table, par->table);
return -EINVAL;
}
+
+ /* NFPROTO_UNSPEC implies NF_INET_* hooks which do not overlap with
+ * NF_ARP_IN,OUT,FORWARD, allow explicit extensions with NFPROTO_ARP
+ * support.
+ */
+ if (par->family == NFPROTO_ARP &&
+ par->match->family != NFPROTO_ARP) {
+ pr_info_ratelimited("%s_tables: %s match: not valid for this family\n",
+ xt_prefix[par->family], par->match->name);
+ return -EINVAL;
+ }
if (par->match->hooks && (par->hook_mask & ~par->match->hooks) != 0) {
char used[64], allow[64];
@@ -1016,6 +1027,18 @@ int xt_check_target(struct xt_tgchk_param *par,
par->target->table, par->table);
return -EINVAL;
}
+
+ /* NFPROTO_UNSPEC implies NF_INET_* hooks which do not overlap with
+ * NF_ARP_IN,OUT,FORWARD, allow explicit extensions with NFPROTO_ARP
+ * support.
+ */
+ if (par->family == NFPROTO_ARP &&
+ par->target->family != NFPROTO_ARP) {
+ pr_info_ratelimited("%s_tables: %s target: not valid for this family\n",
+ xt_prefix[par->family], par->target->name);
+ return -EINVAL;
+ }
+
if (par->target->hooks && (par->hook_mask & ~par->target->hooks) != 0) {
char used[64], allow[64];
diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c
index c437fbd59ec1..43d2ae2be628 100644
--- a/net/netfilter/xt_cgroup.c
+++ b/net/netfilter/xt_cgroup.c
@@ -65,6 +65,9 @@ static int cgroup_mt_check_v1(const struct xt_mtchk_param *par)
info->priv = NULL;
if (info->has_path) {
+ if (strnlen(info->path, sizeof(info->path)) >= sizeof(info->path))
+ return -ENAMETOOLONG;
+
cgrp = cgroup_get_from_path(info->path);
if (IS_ERR(cgrp)) {
pr_info_ratelimited("invalid path, errno=%ld\n",
@@ -102,6 +105,9 @@ static int cgroup_mt_check_v2(const struct xt_mtchk_param *par)
info->priv = NULL;
if (info->has_path) {
+ if (strnlen(info->path, sizeof(info->path)) >= sizeof(info->path))
+ return -ENAMETOOLONG;
+
cgrp = cgroup_get_from_path(info->path);
if (IS_ERR(cgrp)) {
pr_info_ratelimited("invalid path, errno=%ld\n",
diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c
index 72324bd976af..b1d736c15fcb 100644
--- a/net/netfilter/xt_rateest.c
+++ b/net/netfilter/xt_rateest.c
@@ -91,6 +91,11 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par)
goto err1;
}
+ if (strnlen(info->name1, sizeof(info->name1)) >= sizeof(info->name1))
+ return -ENAMETOOLONG;
+ if (strnlen(info->name2, sizeof(info->name2)) >= sizeof(info->name2))
+ return -ENAMETOOLONG;
+
ret = -ENOENT;
est1 = xt_rateest_lookup(par->net, info->name1);
if (!est1)
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 43d871525dbc..5f46c4b5720f 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -579,8 +579,7 @@ static int nci_close_device(struct nci_dev *ndev)
skb_queue_purge(&ndev->rx_q);
skb_queue_purge(&ndev->tx_q);
- /* Flush RX and TX wq */
- flush_workqueue(ndev->rx_wq);
+ /* Flush TX wq, RX wq flush can't be under the lock */
flush_workqueue(ndev->tx_wq);
/* Reset device */
@@ -592,13 +591,13 @@ static int nci_close_device(struct nci_dev *ndev)
msecs_to_jiffies(NCI_RESET_TIMEOUT));
/* After this point our queues are empty
- * and no works are scheduled.
+ * rx work may be running but will see that NCI_UP was cleared
*/
ndev->ops->close(ndev);
clear_bit(NCI_INIT, &ndev->flags);
- /* Flush cmd wq */
+ /* Flush cmd and tx wq */
flush_workqueue(ndev->cmd_wq);
timer_delete_sync(&ndev->cmd_timer);
@@ -613,6 +612,9 @@ static int nci_close_device(struct nci_dev *ndev)
mutex_unlock(&ndev->req_lock);
+ /* rx_work may take req_lock via nci_deactivate_target */
+ flush_workqueue(ndev->rx_wq);
+
return 0;
}
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 67fbf6e48a30..13052408a132 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -2953,6 +2953,8 @@ static int validate_set(const struct nlattr *a,
case OVS_KEY_ATTR_MPLS:
if (!eth_p_mpls(eth_type))
return -EINVAL;
+ if (key_len != sizeof(struct ovs_key_mpls))
+ return -EINVAL;
break;
case OVS_KEY_ATTR_SCTP:
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 6574f9bcdc02..12055af832dc 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -151,11 +151,15 @@ static void vport_netdev_free(struct rcu_head *rcu)
void ovs_netdev_detach_dev(struct vport *vport)
{
ASSERT_RTNL();
- vport->dev->priv_flags &= ~IFF_OVS_DATAPATH;
netdev_rx_handler_unregister(vport->dev);
netdev_upper_dev_unlink(vport->dev,
netdev_master_upper_dev_get(vport->dev));
dev_set_promiscuity(vport->dev, -1);
+
+ /* paired with smp_mb() in netdev_destroy() */
+ smp_wmb();
+
+ vport->dev->priv_flags &= ~IFF_OVS_DATAPATH;
}
static void netdev_destroy(struct vport *vport)
@@ -174,6 +178,9 @@ static void netdev_destroy(struct vport *vport)
rtnl_unlock();
}
+ /* paired with smp_wmb() in ovs_netdev_detach_dev() */
+ smp_mb();
+
call_rcu(&vport->rcu, vport_netdev_free);
}
@@ -189,8 +196,6 @@ void ovs_netdev_tunnel_destroy(struct vport *vport)
*/
if (vport->dev->reg_state == NETREG_REGISTERED)
rtnl_delete_link(vport->dev, 0, NULL);
- netdev_put(vport->dev, &vport->dev_tracker);
- vport->dev = NULL;
rtnl_unlock();
call_rcu(&vport->rcu, vport_netdev_free);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 72d0935139f0..bb2d88205e5a 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3135,6 +3135,7 @@ static int packet_release(struct socket *sock)
spin_lock(&po->bind_lock);
unregister_prot_hook(sk, false);
+ WRITE_ONCE(po->num, 0);
packet_cached_dev_reset(po);
if (po->prot_hook.dev) {
diff --git a/net/qrtr/af_qrtr.c b/net/qrtr/af_qrtr.c
index 55fd2dd37588..d77e9c8212da 100644
--- a/net/qrtr/af_qrtr.c
+++ b/net/qrtr/af_qrtr.c
@@ -118,7 +118,7 @@ static DEFINE_XARRAY_ALLOC(qrtr_ports);
* @ep: endpoint
* @ref: reference count for node
* @nid: node id
- * @qrtr_tx_flow: tree of qrtr_tx_flow, keyed by node << 32 | port
+ * @qrtr_tx_flow: xarray of qrtr_tx_flow, keyed by node << 32 | port
* @qrtr_tx_lock: lock for qrtr_tx_flow inserts
* @rx_queue: receive queue
* @item: list item for broadcast list
@@ -129,7 +129,7 @@ struct qrtr_node {
struct kref ref;
unsigned int nid;
- struct radix_tree_root qrtr_tx_flow;
+ struct xarray qrtr_tx_flow;
struct mutex qrtr_tx_lock; /* for qrtr_tx_flow */
struct sk_buff_head rx_queue;
@@ -172,6 +172,7 @@ static void __qrtr_node_release(struct kref *kref)
struct qrtr_tx_flow *flow;
unsigned long flags;
void __rcu **slot;
+ unsigned long index;
spin_lock_irqsave(&qrtr_nodes_lock, flags);
/* If the node is a bridge for other nodes, there are possibly
@@ -189,11 +190,9 @@ static void __qrtr_node_release(struct kref *kref)
skb_queue_purge(&node->rx_queue);
/* Free tx flow counters */
- radix_tree_for_each_slot(slot, &node->qrtr_tx_flow, &iter, 0) {
- flow = *slot;
- radix_tree_iter_delete(&node->qrtr_tx_flow, &iter, slot);
+ xa_for_each(&node->qrtr_tx_flow, index, flow)
kfree(flow);
- }
+ xa_destroy(&node->qrtr_tx_flow);
kfree(node);
}
@@ -228,9 +227,7 @@ static void qrtr_tx_resume(struct qrtr_node *node, struct sk_buff *skb)
key = remote_node << 32 | remote_port;
- rcu_read_lock();
- flow = radix_tree_lookup(&node->qrtr_tx_flow, key);
- rcu_read_unlock();
+ flow = xa_load(&node->qrtr_tx_flow, key);
if (flow) {
spin_lock(&flow->resume_tx.lock);
flow->pending = 0;
@@ -269,12 +266,13 @@ static int qrtr_tx_wait(struct qrtr_node *node, int dest_node, int dest_port,
return 0;
mutex_lock(&node->qrtr_tx_lock);
- flow = radix_tree_lookup(&node->qrtr_tx_flow, key);
+ flow = xa_load(&node->qrtr_tx_flow, key);
if (!flow) {
flow = kzalloc_obj(*flow);
if (flow) {
init_waitqueue_head(&flow->resume_tx);
- if (radix_tree_insert(&node->qrtr_tx_flow, key, flow)) {
+ if (xa_err(xa_store(&node->qrtr_tx_flow, key, flow,
+ GFP_KERNEL))) {
kfree(flow);
flow = NULL;
}
@@ -326,9 +324,7 @@ static void qrtr_tx_flow_failed(struct qrtr_node *node, int dest_node,
unsigned long key = (u64)dest_node << 32 | dest_port;
struct qrtr_tx_flow *flow;
- rcu_read_lock();
- flow = radix_tree_lookup(&node->qrtr_tx_flow, key);
- rcu_read_unlock();
+ flow = xa_load(&node->qrtr_tx_flow, key);
if (flow) {
spin_lock_irq(&flow->resume_tx.lock);
flow->tx_failed = 1;
@@ -599,7 +595,7 @@ int qrtr_endpoint_register(struct qrtr_endpoint *ep, unsigned int nid)
node->nid = QRTR_EP_NID_AUTO;
node->ep = ep;
- INIT_RADIX_TREE(&node->qrtr_tx_flow, GFP_KERNEL);
+ xa_init(&node->qrtr_tx_flow);
mutex_init(&node->qrtr_tx_lock);
qrtr_node_assign(node, nid);
@@ -627,6 +623,7 @@ void qrtr_endpoint_unregister(struct qrtr_endpoint *ep)
struct qrtr_tx_flow *flow;
struct sk_buff *skb;
unsigned long flags;
+ unsigned long index;
void __rcu **slot;
mutex_lock(&node->ep_lock);
@@ -649,10 +646,8 @@ void qrtr_endpoint_unregister(struct qrtr_endpoint *ep)
/* Wake up any transmitters waiting for resume-tx from the node */
mutex_lock(&node->qrtr_tx_lock);
- radix_tree_for_each_slot(slot, &node->qrtr_tx_flow, &iter, 0) {
- flow = *slot;
+ xa_for_each(&node->qrtr_tx_flow, index, flow)
wake_up_interruptible_all(&flow->resume_tx);
- }
mutex_unlock(&node->qrtr_tx_lock);
qrtr_node_release(node);
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 077f7041df15..2cfec252eeac 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -604,8 +604,13 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
return ibmr;
}
- if (conn)
+ if (conn) {
ic = conn->c_transport_data;
+ if (!ic || !ic->i_cm_id || !ic->i_cm_id->qp) {
+ ret = -ENODEV;
+ goto out;
+ }
+ }
if (!rds_ibdev->mr_8k_pool || !rds_ibdev->mr_1m_pool) {
ret = -ENODEV;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 4829c27446e3..20f7f9ee0b35 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -2969,6 +2969,7 @@ static int tc_chain_fill_node(const struct tcf_proto_ops *tmplt_ops,
tcm->tcm__pad1 = 0;
tcm->tcm__pad2 = 0;
tcm->tcm_handle = 0;
+ tcm->tcm_info = 0;
if (block->q) {
tcm->tcm_ifindex = qdisc_dev(block->q)->ifindex;
tcm->tcm_parent = block->q->handle;
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 339c664beff6..ab364e4e4686 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -503,8 +503,16 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
}
if (TC_H_MAJ(baseclass) == 0) {
- struct Qdisc *q = tcf_block_q(tp->chain->block);
+ struct tcf_block *block = tp->chain->block;
+ struct Qdisc *q;
+ if (tcf_block_shared(block)) {
+ NL_SET_ERR_MSG(extack,
+ "Must specify baseclass when attaching flow filter to block");
+ goto err2;
+ }
+
+ q = tcf_block_q(block);
baseclass = TC_H_MAKE(q->handle, baseclass);
}
if (TC_H_MIN(baseclass) == 0)
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index be81c108179d..23884ef8b80c 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -247,8 +247,18 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
struct nlattr *tb[TCA_FW_MAX + 1];
int err;
- if (!opt)
- return handle ? -EINVAL : 0; /* Succeed if it is old method. */
+ if (!opt) {
+ if (handle)
+ return -EINVAL;
+
+ if (tcf_block_shared(tp->chain->block)) {
+ NL_SET_ERR_MSG(extack,
+ "Must specify mark when attaching fw filter to block");
+ return -EINVAL;
+ }
+
+ return 0; /* Succeed if it is old method. */
+ }
err = nla_parse_nested_deprecated(tb, TCA_FW_MAX, opt, fw_policy,
NULL);
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index b5657ffbbf84..83b2ca2e37fc 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -555,7 +555,7 @@ static void
rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u64 x, u64 y)
{
u64 y1, y2, dx, dy;
- u32 dsm;
+ u64 dsm;
if (isc->sm1 <= isc->sm2) {
/* service curve is convex */
@@ -598,7 +598,7 @@ rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u64 x, u64 y)
*/
dx = (y1 - y) << SM_SHIFT;
dsm = isc->sm1 - isc->sm2;
- do_div(dx, dsm);
+ dx = div64_u64(dx, dsm);
/*
* check if (x, y1) belongs to the 1st segment of rtsc.
* if so, add the offset.
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 5de1c932944a..20df1c08b1e9 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -519,8 +519,9 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
goto finish_segs;
}
- skb->data[get_random_u32_below(skb_headlen(skb))] ^=
- 1<<get_random_u32_below(8);
+ if (skb_headlen(skb))
+ skb->data[get_random_u32_below(skb_headlen(skb))] ^=
+ 1 << get_random_u32_below(8);
}
if (unlikely(q->t_len >= sch->limit)) {
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index d833e36f7fd4..c1d9b923938d 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -135,9 +135,16 @@ out:
sock_put(sk);
}
+static bool smc_rx_pipe_buf_get(struct pipe_inode_info *pipe,
+ struct pipe_buffer *buf)
+{
+ /* smc_spd_priv in buf->private is not shareable; disallow cloning. */
+ return false;
+}
+
static const struct pipe_buf_operations smc_pipe_ops = {
.release = smc_rx_pipe_buf_release,
- .get = generic_pipe_buf_get
+ .get = smc_rx_pipe_buf_get,
};
static void smc_rx_spd_release(struct splice_pipe_desc *spd,
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 5fe07f110fe8..dd9dda759bbb 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -246,6 +246,7 @@ static int tls_decrypt_async_wait(struct tls_sw_context_rx *ctx)
crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
atomic_inc(&ctx->decrypt_pending);
+ __skb_queue_purge(&ctx->async_hold);
return ctx->async_wait.err;
}
@@ -2225,7 +2226,6 @@ recv_end:
/* Wait for all previously submitted records to be decrypted */
ret = tls_decrypt_async_wait(ctx);
- __skb_queue_purge(&ctx->async_hold);
if (ret) {
if (err >= 0 || err == -EINPROGRESS)
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 2f7d94d682cb..d912ed2f012a 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -2928,6 +2928,7 @@ static void vsock_net_init(struct net *net)
net->vsock.mode = vsock_net_child_mode(current->nsproxy->net_ns);
net->vsock.child_ns_mode = net->vsock.mode;
+ net->vsock.child_ns_mode_locked = 0;
}
static __net_init int vsock_sysctl_init_net(struct net *net)
diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c
index b981a4828d08..e47ebd8acd21 100644
--- a/net/x25/x25_in.c
+++ b/net/x25/x25_in.c
@@ -34,6 +34,10 @@ static int x25_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more)
struct sk_buff *skbo, *skbn = skb;
struct x25_sock *x25 = x25_sk(sk);
+ /* make sure we don't overflow */
+ if (x25->fraglen + skb->len > USHRT_MAX)
+ return 1;
+
if (more) {
x25->fraglen += skb->len;
skb_queue_tail(&x25->fragment_queue, skb);
@@ -44,10 +48,9 @@ static int x25_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more)
if (x25->fraglen > 0) { /* End of fragment */
int len = x25->fraglen + skb->len;
- if ((skbn = alloc_skb(len, GFP_ATOMIC)) == NULL){
- kfree_skb(skb);
+ skbn = alloc_skb(len, GFP_ATOMIC);
+ if (!skbn)
return 1;
- }
skb_queue_tail(&x25->fragment_queue, skb);
diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c
index 0285aaa1e93c..159708d9ad20 100644
--- a/net/x25/x25_subr.c
+++ b/net/x25/x25_subr.c
@@ -40,6 +40,7 @@ void x25_clear_queues(struct sock *sk)
skb_queue_purge(&x25->interrupt_in_queue);
skb_queue_purge(&x25->interrupt_out_queue);
skb_queue_purge(&x25->fragment_queue);
+ x25->fraglen = 0;
}
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 4ed346e682c7..dc1312ed5a09 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -75,7 +75,10 @@ int xfrm_input_unregister_afinfo(const struct xfrm_input_afinfo *afinfo)
spin_lock_bh(&xfrm_input_afinfo_lock);
if (likely(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family])) {
- if (unlikely(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family] != afinfo))
+ const struct xfrm_input_afinfo *cur;
+
+ cur = rcu_access_pointer(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family]);
+ if (unlikely(cur != afinfo))
err = -EINVAL;
else
RCU_INIT_POINTER(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family], NULL);
diff --git a/net/xfrm/xfrm_iptfs.c b/net/xfrm/xfrm_iptfs.c
index 050a82101ca5..97bc979e55ba 100644
--- a/net/xfrm/xfrm_iptfs.c
+++ b/net/xfrm/xfrm_iptfs.c
@@ -901,6 +901,12 @@ static u32 iptfs_reassem_cont(struct xfrm_iptfs_data *xtfs, u64 seq,
iptfs_skb_can_add_frags(newskb, fragwalk, data, copylen)) {
iptfs_skb_add_frags(newskb, fragwalk, data, copylen);
} else {
+ if (skb_linearize(newskb)) {
+ XFRM_INC_STATS(xs_net(xtfs->x),
+ LINUX_MIB_XFRMINBUFFERERROR);
+ goto abandon;
+ }
+
/* copy fragment data into newskb */
if (skb_copy_seq_read(st, data, skb_put(newskb, copylen),
copylen)) {
@@ -991,6 +997,11 @@ static bool __input_process_payload(struct xfrm_state *x, u32 data,
iplen = be16_to_cpu(iph->tot_len);
iphlen = iph->ihl << 2;
+ if (iplen < iphlen || iphlen < sizeof(*iph)) {
+ XFRM_INC_STATS(net,
+ LINUX_MIB_XFRMINHDRERROR);
+ goto done;
+ }
protocol = cpu_to_be16(ETH_P_IP);
XFRM_MODE_SKB_CB(skbseq->root_skb)->tos = iph->tos;
} else if (iph->version == 0x6) {
@@ -2653,9 +2664,6 @@ static int iptfs_clone_state(struct xfrm_state *x, struct xfrm_state *orig)
if (!xtfs)
return -ENOMEM;
- x->mode_data = xtfs;
- xtfs->x = x;
-
xtfs->ra_newskb = NULL;
if (xtfs->cfg.reorder_win_size) {
xtfs->w_saved = kzalloc_objs(*xtfs->w_saved,
@@ -2666,6 +2674,9 @@ static int iptfs_clone_state(struct xfrm_state *x, struct xfrm_state *orig)
}
}
+ x->mode_data = xtfs;
+ xtfs->x = x;
+
return 0;
}
diff --git a/net/xfrm/xfrm_nat_keepalive.c b/net/xfrm/xfrm_nat_keepalive.c
index ebf95d48e86c..1856beee0149 100644
--- a/net/xfrm/xfrm_nat_keepalive.c
+++ b/net/xfrm/xfrm_nat_keepalive.c
@@ -261,7 +261,7 @@ int __net_init xfrm_nat_keepalive_net_init(struct net *net)
int xfrm_nat_keepalive_net_fini(struct net *net)
{
- cancel_delayed_work_sync(&net->xfrm.nat_keepalive_work);
+ disable_delayed_work_sync(&net->xfrm.nat_keepalive_work);
return 0;
}
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 9e1a522ab1c5..362939aa56cf 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -4156,7 +4156,7 @@ void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo)
int i;
for (i = 0; i < ARRAY_SIZE(xfrm_policy_afinfo); i++) {
- if (xfrm_policy_afinfo[i] != afinfo)
+ if (rcu_access_pointer(xfrm_policy_afinfo[i]) != afinfo)
continue;
RCU_INIT_POINTER(xfrm_policy_afinfo[i], NULL);
break;
@@ -4242,7 +4242,7 @@ static int __net_init xfrm_policy_init(struct net *net)
net->xfrm.policy_count[XFRM_POLICY_MAX + dir] = 0;
htab = &net->xfrm.policy_bydst[dir];
- htab->table = xfrm_hash_alloc(sz);
+ rcu_assign_pointer(htab->table, xfrm_hash_alloc(sz));
if (!htab->table)
goto out_bydst;
htab->hmask = hmask;
@@ -4269,7 +4269,7 @@ out_bydst:
struct xfrm_policy_hash *htab;
htab = &net->xfrm.policy_bydst[dir];
- xfrm_hash_free(htab->table, sz);
+ xfrm_hash_free(rcu_dereference_protected(htab->table, true), sz);
}
xfrm_hash_free(net->xfrm.policy_byidx, sz);
out_byidx:
@@ -4282,6 +4282,8 @@ static void xfrm_policy_fini(struct net *net)
unsigned int sz;
int dir;
+ disable_work_sync(&net->xfrm.policy_hthresh.work);
+
flush_work(&net->xfrm.policy_hash_work);
#ifdef CONFIG_XFRM_SUB_POLICY
xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, false);
@@ -4295,8 +4297,8 @@ static void xfrm_policy_fini(struct net *net)
htab = &net->xfrm.policy_bydst[dir];
sz = (htab->hmask + 1) * sizeof(struct hlist_head);
- WARN_ON(!hlist_empty(htab->table));
- xfrm_hash_free(htab->table, sz);
+ WARN_ON(!hlist_empty(rcu_dereference_protected(htab->table, true)));
+ xfrm_hash_free(rcu_dereference_protected(htab->table, true), sz);
}
sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 98b362d51836..1748d374abca 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -53,7 +53,7 @@ static DECLARE_WORK(xfrm_state_gc_work, xfrm_state_gc_task);
static HLIST_HEAD(xfrm_state_gc_list);
static HLIST_HEAD(xfrm_state_dev_gc_list);
-static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x)
+static inline bool xfrm_state_hold_rcu(struct xfrm_state *x)
{
return refcount_inc_not_zero(&x->refcnt);
}
@@ -870,7 +870,7 @@ xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid)
for (i = 0; i <= net->xfrm.state_hmask; i++) {
struct xfrm_state *x;
- hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
+ hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + i, bydst) {
if (xfrm_id_proto_match(x->id.proto, proto) &&
(err = security_xfrm_state_delete(x)) != 0) {
xfrm_audit_state_delete(x, 0, task_valid);
@@ -891,7 +891,7 @@ xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool
struct xfrm_state *x;
struct xfrm_dev_offload *xso;
- hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
+ hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + i, bydst) {
xso = &x->xso;
if (xso->dev == dev &&
@@ -931,7 +931,7 @@ int xfrm_state_flush(struct net *net, u8 proto, bool task_valid)
for (i = 0; i <= net->xfrm.state_hmask; i++) {
struct xfrm_state *x;
restart:
- hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
+ hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + i, bydst) {
if (!xfrm_state_kern(x) &&
xfrm_id_proto_match(x->id.proto, proto)) {
xfrm_state_hold(x);
@@ -973,7 +973,7 @@ int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_vali
err = -ESRCH;
for (i = 0; i <= net->xfrm.state_hmask; i++) {
restart:
- hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
+ hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + i, bydst) {
xso = &x->xso;
if (!xfrm_state_kern(x) && xso->dev == dev) {
@@ -1563,23 +1563,23 @@ found:
list_add(&x->km.all, &net->xfrm.state_all);
h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
XFRM_STATE_INSERT(bydst, &x->bydst,
- net->xfrm.state_bydst + h,
+ xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h,
x->xso.type);
h = xfrm_src_hash(net, daddr, saddr, encap_family);
XFRM_STATE_INSERT(bysrc, &x->bysrc,
- net->xfrm.state_bysrc + h,
+ xfrm_state_deref_prot(net->xfrm.state_bysrc, net) + h,
x->xso.type);
INIT_HLIST_NODE(&x->state_cache);
if (x->id.spi) {
h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family);
XFRM_STATE_INSERT(byspi, &x->byspi,
- net->xfrm.state_byspi + h,
+ xfrm_state_deref_prot(net->xfrm.state_byspi, net) + h,
x->xso.type);
}
if (x->km.seq) {
h = xfrm_seq_hash(net, x->km.seq);
XFRM_STATE_INSERT(byseq, &x->byseq,
- net->xfrm.state_byseq + h,
+ xfrm_state_deref_prot(net->xfrm.state_byseq, net) + h,
x->xso.type);
}
x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
@@ -1652,7 +1652,7 @@ xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id,
spin_lock_bh(&net->xfrm.xfrm_state_lock);
h = xfrm_dst_hash(net, daddr, saddr, reqid, family);
- hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
+ hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, bydst) {
if (x->props.family == family &&
x->props.reqid == reqid &&
(mark & x->mark.m) == x->mark.v &&
@@ -1703,18 +1703,12 @@ static struct xfrm_state *xfrm_state_lookup_spi_proto(struct net *net, __be32 sp
struct xfrm_state *x;
unsigned int i;
- rcu_read_lock();
for (i = 0; i <= net->xfrm.state_hmask; i++) {
- hlist_for_each_entry_rcu(x, &net->xfrm.state_byspi[i], byspi) {
- if (x->id.spi == spi && x->id.proto == proto) {
- if (!xfrm_state_hold_rcu(x))
- continue;
- rcu_read_unlock();
+ hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_byspi, net) + i, byspi) {
+ if (x->id.spi == spi && x->id.proto == proto)
return x;
- }
}
}
- rcu_read_unlock();
return NULL;
}
@@ -1730,25 +1724,29 @@ static void __xfrm_state_insert(struct xfrm_state *x)
h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr,
x->props.reqid, x->props.family);
- XFRM_STATE_INSERT(bydst, &x->bydst, net->xfrm.state_bydst + h,
+ XFRM_STATE_INSERT(bydst, &x->bydst,
+ xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h,
x->xso.type);
h = xfrm_src_hash(net, &x->id.daddr, &x->props.saddr, x->props.family);
- XFRM_STATE_INSERT(bysrc, &x->bysrc, net->xfrm.state_bysrc + h,
+ XFRM_STATE_INSERT(bysrc, &x->bysrc,
+ xfrm_state_deref_prot(net->xfrm.state_bysrc, net) + h,
x->xso.type);
if (x->id.spi) {
h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto,
x->props.family);
- XFRM_STATE_INSERT(byspi, &x->byspi, net->xfrm.state_byspi + h,
+ XFRM_STATE_INSERT(byspi, &x->byspi,
+ xfrm_state_deref_prot(net->xfrm.state_byspi, net) + h,
x->xso.type);
}
if (x->km.seq) {
h = xfrm_seq_hash(net, x->km.seq);
- XFRM_STATE_INSERT(byseq, &x->byseq, net->xfrm.state_byseq + h,
+ XFRM_STATE_INSERT(byseq, &x->byseq,
+ xfrm_state_deref_prot(net->xfrm.state_byseq, net) + h,
x->xso.type);
}
@@ -1775,7 +1773,7 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
u32 cpu_id = xnew->pcpu_num;
h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family);
- hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
+ hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, bydst) {
if (x->props.family == family &&
x->props.reqid == reqid &&
x->if_id == if_id &&
@@ -1811,7 +1809,7 @@ static struct xfrm_state *__find_acq_core(struct net *net,
struct xfrm_state *x;
u32 mark = m->v & m->m;
- hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
+ hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, bydst) {
if (x->props.reqid != reqid ||
x->props.mode != mode ||
x->props.family != family ||
@@ -1868,10 +1866,12 @@ static struct xfrm_state *__find_acq_core(struct net *net,
ktime_set(net->xfrm.sysctl_acq_expires, 0),
HRTIMER_MODE_REL_SOFT);
list_add(&x->km.all, &net->xfrm.state_all);
- XFRM_STATE_INSERT(bydst, &x->bydst, net->xfrm.state_bydst + h,
+ XFRM_STATE_INSERT(bydst, &x->bydst,
+ xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h,
x->xso.type);
h = xfrm_src_hash(net, daddr, saddr, family);
- XFRM_STATE_INSERT(bysrc, &x->bysrc, net->xfrm.state_bysrc + h,
+ XFRM_STATE_INSERT(bysrc, &x->bysrc,
+ xfrm_state_deref_prot(net->xfrm.state_bysrc, net) + h,
x->xso.type);
net->xfrm.state_num++;
@@ -2091,7 +2091,7 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n
if (m->reqid) {
h = xfrm_dst_hash(net, &m->old_daddr, &m->old_saddr,
m->reqid, m->old_family);
- hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
+ hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, bydst) {
if (x->props.mode != m->mode ||
x->id.proto != m->proto)
continue;
@@ -2110,7 +2110,7 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n
} else {
h = xfrm_src_hash(net, &m->old_daddr, &m->old_saddr,
m->old_family);
- hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) {
+ hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bysrc, net) + h, bysrc) {
if (x->props.mode != m->mode ||
x->id.proto != m->proto)
continue;
@@ -2264,6 +2264,7 @@ out:
err = 0;
x->km.state = XFRM_STATE_DEAD;
+ xfrm_dev_state_delete(x);
__xfrm_state_put(x);
}
@@ -2312,7 +2313,7 @@ void xfrm_state_update_stats(struct net *net)
spin_lock_bh(&net->xfrm.xfrm_state_lock);
for (i = 0; i <= net->xfrm.state_hmask; i++) {
- hlist_for_each_entry(x, net->xfrm.state_bydst + i, bydst)
+ hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + i, bydst)
xfrm_dev_state_update_stats(x);
}
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
@@ -2503,7 +2504,7 @@ static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 s
unsigned int h = xfrm_seq_hash(net, seq);
struct xfrm_state *x;
- hlist_for_each_entry_rcu(x, net->xfrm.state_byseq + h, byseq) {
+ hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_byseq, net) + h, byseq) {
if (x->km.seq == seq &&
(mark & x->mark.m) == x->mark.v &&
x->pcpu_num == pcpu_num &&
@@ -2602,12 +2603,13 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high,
if (!x0) {
x->id.spi = newspi;
h = xfrm_spi_hash(net, &x->id.daddr, newspi, x->id.proto, x->props.family);
- XFRM_STATE_INSERT(byspi, &x->byspi, net->xfrm.state_byspi + h, x->xso.type);
+ XFRM_STATE_INSERT(byspi, &x->byspi,
+ xfrm_state_deref_prot(net->xfrm.state_byspi, net) + h,
+ x->xso.type);
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
err = 0;
goto unlock;
}
- xfrm_state_put(x0);
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
next:
@@ -3258,6 +3260,7 @@ EXPORT_SYMBOL(xfrm_init_state);
int __net_init xfrm_state_init(struct net *net)
{
+ struct hlist_head *ndst, *nsrc, *nspi, *nseq;
unsigned int sz;
if (net_eq(net, &init_net))
@@ -3268,18 +3271,25 @@ int __net_init xfrm_state_init(struct net *net)
sz = sizeof(struct hlist_head) * 8;
- net->xfrm.state_bydst = xfrm_hash_alloc(sz);
- if (!net->xfrm.state_bydst)
+ ndst = xfrm_hash_alloc(sz);
+ if (!ndst)
goto out_bydst;
- net->xfrm.state_bysrc = xfrm_hash_alloc(sz);
- if (!net->xfrm.state_bysrc)
+ rcu_assign_pointer(net->xfrm.state_bydst, ndst);
+
+ nsrc = xfrm_hash_alloc(sz);
+ if (!nsrc)
goto out_bysrc;
- net->xfrm.state_byspi = xfrm_hash_alloc(sz);
- if (!net->xfrm.state_byspi)
+ rcu_assign_pointer(net->xfrm.state_bysrc, nsrc);
+
+ nspi = xfrm_hash_alloc(sz);
+ if (!nspi)
goto out_byspi;
- net->xfrm.state_byseq = xfrm_hash_alloc(sz);
- if (!net->xfrm.state_byseq)
+ rcu_assign_pointer(net->xfrm.state_byspi, nspi);
+
+ nseq = xfrm_hash_alloc(sz);
+ if (!nseq)
goto out_byseq;
+ rcu_assign_pointer(net->xfrm.state_byseq, nseq);
net->xfrm.state_cache_input = alloc_percpu(struct hlist_head);
if (!net->xfrm.state_cache_input)
@@ -3295,17 +3305,19 @@ int __net_init xfrm_state_init(struct net *net)
return 0;
out_state_cache_input:
- xfrm_hash_free(net->xfrm.state_byseq, sz);
+ xfrm_hash_free(nseq, sz);
out_byseq:
- xfrm_hash_free(net->xfrm.state_byspi, sz);
+ xfrm_hash_free(nspi, sz);
out_byspi:
- xfrm_hash_free(net->xfrm.state_bysrc, sz);
+ xfrm_hash_free(nsrc, sz);
out_bysrc:
- xfrm_hash_free(net->xfrm.state_bydst, sz);
+ xfrm_hash_free(ndst, sz);
out_bydst:
return -ENOMEM;
}
+#define xfrm_state_deref_netexit(table) \
+ rcu_dereference_protected((table), true /* netns is going away */)
void xfrm_state_fini(struct net *net)
{
unsigned int sz;
@@ -3318,17 +3330,17 @@ void xfrm_state_fini(struct net *net)
WARN_ON(!list_empty(&net->xfrm.state_all));
for (i = 0; i <= net->xfrm.state_hmask; i++) {
- WARN_ON(!hlist_empty(net->xfrm.state_byseq + i));
- WARN_ON(!hlist_empty(net->xfrm.state_byspi + i));
- WARN_ON(!hlist_empty(net->xfrm.state_bysrc + i));
- WARN_ON(!hlist_empty(net->xfrm.state_bydst + i));
+ WARN_ON(!hlist_empty(xfrm_state_deref_netexit(net->xfrm.state_byseq) + i));
+ WARN_ON(!hlist_empty(xfrm_state_deref_netexit(net->xfrm.state_byspi) + i));
+ WARN_ON(!hlist_empty(xfrm_state_deref_netexit(net->xfrm.state_bysrc) + i));
+ WARN_ON(!hlist_empty(xfrm_state_deref_netexit(net->xfrm.state_bydst) + i));
}
sz = (net->xfrm.state_hmask + 1) * sizeof(struct hlist_head);
- xfrm_hash_free(net->xfrm.state_byseq, sz);
- xfrm_hash_free(net->xfrm.state_byspi, sz);
- xfrm_hash_free(net->xfrm.state_bysrc, sz);
- xfrm_hash_free(net->xfrm.state_bydst, sz);
+ xfrm_hash_free(xfrm_state_deref_netexit(net->xfrm.state_byseq), sz);
+ xfrm_hash_free(xfrm_state_deref_netexit(net->xfrm.state_byspi), sz);
+ xfrm_hash_free(xfrm_state_deref_netexit(net->xfrm.state_bysrc), sz);
+ xfrm_hash_free(xfrm_state_deref_netexit(net->xfrm.state_bydst), sz);
free_percpu(net->xfrm.state_cache_input);
}
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 403b5ecac2c5..1656b487f833 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -35,6 +35,15 @@
#endif
#include <linux/unaligned.h>
+static struct sock *xfrm_net_nlsk(const struct net *net, const struct sk_buff *skb)
+{
+ /* get the source of this request, see netlink_unicast_kernel */
+ const struct sock *sk = NETLINK_CB(skb).sk;
+
+ /* sk is refcounted, the netns stays alive and nlsk with it */
+ return rcu_dereference_protected(net->xfrm.nlsk, sk->sk_net_refcnt);
+}
+
static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type,
struct netlink_ext_ack *extack)
{
@@ -1727,7 +1736,7 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
err = build_spdinfo(r_skb, net, sportid, seq, *flags);
BUG_ON(err < 0);
- return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid);
+ return nlmsg_unicast(xfrm_net_nlsk(net, skb), r_skb, sportid);
}
static inline unsigned int xfrm_sadinfo_msgsize(void)
@@ -1787,7 +1796,7 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
err = build_sadinfo(r_skb, net, sportid, seq, *flags);
BUG_ON(err < 0);
- return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid);
+ return nlmsg_unicast(xfrm_net_nlsk(net, skb), r_skb, sportid);
}
static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -1807,7 +1816,7 @@ static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
if (IS_ERR(resp_skb)) {
err = PTR_ERR(resp_skb);
} else {
- err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid);
+ err = nlmsg_unicast(xfrm_net_nlsk(net, skb), resp_skb, NETLINK_CB(skb).portid);
}
xfrm_state_put(x);
out_noput:
@@ -1850,6 +1859,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
pcpu_num = nla_get_u32(attrs[XFRMA_SA_PCPU]);
if (pcpu_num >= num_possible_cpus()) {
err = -EINVAL;
+ NL_SET_ERR_MSG(extack, "pCPU number too big");
goto out_noput;
}
}
@@ -1897,7 +1907,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
}
}
- err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid);
+ err = nlmsg_unicast(xfrm_net_nlsk(net, skb), resp_skb, NETLINK_CB(skb).portid);
out:
xfrm_state_put(x);
@@ -2542,7 +2552,7 @@ static int xfrm_get_default(struct sk_buff *skb, struct nlmsghdr *nlh,
r_up->out = net->xfrm.policy_default[XFRM_POLICY_OUT];
nlmsg_end(r_skb, r_nlh);
- return nlmsg_unicast(net->xfrm.nlsk, r_skb, portid);
+ return nlmsg_unicast(xfrm_net_nlsk(net, skb), r_skb, portid);
}
static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -2608,7 +2618,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
if (IS_ERR(resp_skb)) {
err = PTR_ERR(resp_skb);
} else {
- err = nlmsg_unicast(net->xfrm.nlsk, resp_skb,
+ err = nlmsg_unicast(xfrm_net_nlsk(net, skb), resp_skb,
NETLINK_CB(skb).portid);
}
} else {
@@ -2781,7 +2791,7 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
err = build_aevent(r_skb, x, &c);
BUG_ON(err < 0);
- err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).portid);
+ err = nlmsg_unicast(xfrm_net_nlsk(net, skb), r_skb, NETLINK_CB(skb).portid);
spin_unlock_bh(&x->lock);
xfrm_state_put(x);
return err;
@@ -3001,8 +3011,10 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh,
if (attrs[XFRMA_SA_PCPU]) {
x->pcpu_num = nla_get_u32(attrs[XFRMA_SA_PCPU]);
err = -EINVAL;
- if (x->pcpu_num >= num_possible_cpus())
+ if (x->pcpu_num >= num_possible_cpus()) {
+ NL_SET_ERR_MSG(extack, "pCPU number too big");
goto free_state;
+ }
}
err = verify_newpolicy_info(&ua->policy, extack);
@@ -3483,7 +3495,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
goto err;
}
- err = netlink_dump_start(net->xfrm.nlsk, skb, nlh, &c);
+ err = netlink_dump_start(xfrm_net_nlsk(net, skb), skb, nlh, &c);
goto err;
}
@@ -3673,7 +3685,7 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x)
}
if (x->if_id)
l += nla_total_size(sizeof(x->if_id));
- if (x->pcpu_num)
+ if (x->pcpu_num != UINT_MAX)
l += nla_total_size(sizeof(x->pcpu_num));
/* Must count x->lastused as it may become non-zero behind our back. */
diff --git a/rust/kernel/regulator.rs b/rust/kernel/regulator.rs
index 4f7837c7e53a..41e730cedc81 100644
--- a/rust/kernel/regulator.rs
+++ b/rust/kernel/regulator.rs
@@ -23,7 +23,10 @@ use crate::{
prelude::*,
};
-use core::{marker::PhantomData, mem::ManuallyDrop, ptr::NonNull};
+use core::{
+ marker::PhantomData,
+ mem::ManuallyDrop, //
+};
mod private {
pub trait Sealed {}
@@ -229,15 +232,17 @@ pub fn devm_enable_optional(dev: &Device<Bound>, name: &CStr) -> Result {
///
/// # Invariants
///
-/// - `inner` is a non-null wrapper over a pointer to a `struct
-/// regulator` obtained from [`regulator_get()`].
+/// - `inner` is a pointer obtained from a successful call to
+/// [`regulator_get()`]. It is treated as an opaque token that may only be
+/// accessed using C API methods (e.g., it may be `NULL` if the C API returns
+/// `NULL`).
///
/// [`regulator_get()`]: https://docs.kernel.org/driver-api/regulator.html#c.regulator_get
pub struct Regulator<State>
where
State: RegulatorState,
{
- inner: NonNull<bindings::regulator>,
+ inner: *mut bindings::regulator,
_phantom: PhantomData<State>,
}
@@ -249,7 +254,7 @@ impl<T: RegulatorState> Regulator<T> {
// SAFETY: Safe as per the type invariants of `Regulator`.
to_result(unsafe {
bindings::regulator_set_voltage(
- self.inner.as_ptr(),
+ self.inner,
min_voltage.as_microvolts(),
max_voltage.as_microvolts(),
)
@@ -259,7 +264,7 @@ impl<T: RegulatorState> Regulator<T> {
/// Gets the current voltage of the regulator.
pub fn get_voltage(&self) -> Result<Voltage> {
// SAFETY: Safe as per the type invariants of `Regulator`.
- let voltage = unsafe { bindings::regulator_get_voltage(self.inner.as_ptr()) };
+ let voltage = unsafe { bindings::regulator_get_voltage(self.inner) };
to_result(voltage).map(|()| Voltage::from_microvolts(voltage))
}
@@ -270,10 +275,8 @@ impl<T: RegulatorState> Regulator<T> {
// received from the C code.
from_err_ptr(unsafe { bindings::regulator_get(dev.as_raw(), name.as_char_ptr()) })?;
- // SAFETY: We can safely trust `inner` to be a pointer to a valid
- // regulator if `ERR_PTR` was not returned.
- let inner = unsafe { NonNull::new_unchecked(inner) };
-
+ // INVARIANT: `inner` is a pointer obtained from `regulator_get()`, and
+ // the call was successful.
Ok(Self {
inner,
_phantom: PhantomData,
@@ -282,12 +285,12 @@ impl<T: RegulatorState> Regulator<T> {
fn enable_internal(&self) -> Result {
// SAFETY: Safe as per the type invariants of `Regulator`.
- to_result(unsafe { bindings::regulator_enable(self.inner.as_ptr()) })
+ to_result(unsafe { bindings::regulator_enable(self.inner) })
}
fn disable_internal(&self) -> Result {
// SAFETY: Safe as per the type invariants of `Regulator`.
- to_result(unsafe { bindings::regulator_disable(self.inner.as_ptr()) })
+ to_result(unsafe { bindings::regulator_disable(self.inner) })
}
}
@@ -349,7 +352,7 @@ impl<T: IsEnabled> Regulator<T> {
/// Checks if the regulator is enabled.
pub fn is_enabled(&self) -> bool {
// SAFETY: Safe as per the type invariants of `Regulator`.
- unsafe { bindings::regulator_is_enabled(self.inner.as_ptr()) != 0 }
+ unsafe { bindings::regulator_is_enabled(self.inner) != 0 }
}
}
@@ -359,11 +362,11 @@ impl<T: RegulatorState> Drop for Regulator<T> {
// SAFETY: By the type invariants, we know that `self` owns a
// reference on the enabled refcount, so it is safe to relinquish it
// now.
- unsafe { bindings::regulator_disable(self.inner.as_ptr()) };
+ unsafe { bindings::regulator_disable(self.inner) };
}
// SAFETY: By the type invariants, we know that `self` owns a reference,
// so it is safe to relinquish it now.
- unsafe { bindings::regulator_put(self.inner.as_ptr()) };
+ unsafe { bindings::regulator_put(self.inner) };
}
}
diff --git a/samples/landlock/sandboxer.c b/samples/landlock/sandboxer.c
index e7af02f98208..9f21088c0855 100644
--- a/samples/landlock/sandboxer.c
+++ b/samples/landlock/sandboxer.c
@@ -299,7 +299,7 @@ out_unset:
/* clang-format on */
-#define LANDLOCK_ABI_LAST 7
+#define LANDLOCK_ABI_LAST 8
#define XSTR(s) #s
#define STR(s) XSTR(s)
@@ -436,7 +436,8 @@ int main(const int argc, char *const argv[], char *const *const envp)
/* Removes LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON for ABI < 7 */
supported_restrict_flags &=
~LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON;
-
+ __attribute__((fallthrough));
+ case 7:
/* Must be printed for any ABI < LANDLOCK_ABI_LAST. */
fprintf(stderr,
"Hint: You should update the running kernel "
diff --git a/scripts/coccinelle/api/kmalloc_objs.cocci b/scripts/coccinelle/api/kmalloc_objs.cocci
index db12b7be7247..e9a415b7b6f4 100644
--- a/scripts/coccinelle/api/kmalloc_objs.cocci
+++ b/scripts/coccinelle/api/kmalloc_objs.cocci
@@ -122,3 +122,14 @@ fresh identifier ALLOC_OBJS = script:python(ALLOC_ARRAY) { alloc_array(ALLOC_ARR
- ALLOC(struct_size_t(TYPE, FLEX, COUNT), GFP)
+ ALLOC_FLEX(TYPE, FLEX, COUNT, GFP)
)
+
+@drop_gfp_kernel depends on patch && !(file in "tools") && !(file in "samples")@
+identifier ALLOC = {kmalloc_obj,kmalloc_objs,kmalloc_flex,
+ kzalloc_obj,kzalloc_objs,kzalloc_flex,
+ kvmalloc_obj,kvmalloc_objs,kvmalloc_flex,
+ kvzalloc_obj,kvzalloc_objs,kvzalloc_flex};
+@@
+
+ ALLOC(...
+- , GFP_KERNEL
+ )
diff --git a/scripts/kconfig/merge_config.sh b/scripts/kconfig/merge_config.sh
index 735e1de450c6..f08e0863b712 100755
--- a/scripts/kconfig/merge_config.sh
+++ b/scripts/kconfig/merge_config.sh
@@ -151,6 +151,7 @@ for ORIG_MERGE_FILE in $MERGE_LIST ; do
if ! "$AWK" -v prefix="$CONFIG_PREFIX" \
-v warnoverride="$WARNOVERRIDE" \
-v strict="$STRICT" \
+ -v outfile="$TMP_FILE.new" \
-v builtin="$BUILTIN" \
-v warnredun="$WARNREDUN" '
BEGIN {
@@ -195,7 +196,7 @@ for ORIG_MERGE_FILE in $MERGE_LIST ; do
# First pass: read merge file, store all lines and index
FILENAME == ARGV[1] {
- mergefile = FILENAME
+ mergefile = FILENAME
merge_lines[FNR] = $0
merge_total = FNR
cfg = get_cfg($0)
@@ -212,17 +213,17 @@ for ORIG_MERGE_FILE in $MERGE_LIST ; do
# Not a config or not in merge file - keep it
if (cfg == "" || !(cfg in merge_cfg)) {
- print $0 >> ARGV[3]
+ print $0 >> outfile
next
}
- prev_val = $0
+ prev_val = $0
new_val = merge_cfg[cfg]
# BUILTIN: do not demote y to m
if (builtin == "true" && new_val ~ /=m$/ && prev_val ~ /=y$/) {
warn_builtin(cfg, prev_val, new_val)
- print $0 >> ARGV[3]
+ print $0 >> outfile
skip_merge[merge_cfg_line[cfg]] = 1
next
}
@@ -235,7 +236,7 @@ for ORIG_MERGE_FILE in $MERGE_LIST ; do
# "=n" is the same as "is not set"
if (prev_val ~ /=n$/ && new_val ~ / is not set$/) {
- print $0 >> ARGV[3]
+ print $0 >> outfile
next
}
@@ -246,25 +247,20 @@ for ORIG_MERGE_FILE in $MERGE_LIST ; do
}
}
- # output file, skip all lines
- FILENAME == ARGV[3] {
- nextfile
- }
-
END {
# Newline in case base file lacks trailing newline
- print "" >> ARGV[3]
+ print "" >> outfile
# Append merge file, skipping lines marked for builtin preservation
for (i = 1; i <= merge_total; i++) {
if (!(i in skip_merge)) {
- print merge_lines[i] >> ARGV[3]
+ print merge_lines[i] >> outfile
}
}
if (strict_violated) {
exit 1
}
}' \
- "$ORIG_MERGE_FILE" "$TMP_FILE" "$TMP_FILE.new"; then
+ "$ORIG_MERGE_FILE" "$TMP_FILE"; then
# awk exited non-zero, strict mode was violated
STRICT_MODE_VIOLATED=true
fi
@@ -381,7 +377,7 @@ END {
STRICT_MODE_VIOLATED=true
fi
-if [ "$STRICT" == "true" ] && [ "$STRICT_MODE_VIOLATED" == "true" ]; then
+if [ "$STRICT" = "true" ] && [ "$STRICT_MODE_VIOLATED" = "true" ]; then
echo "Requested and effective config differ"
exit 1
fi
diff --git a/security/landlock/domain.c b/security/landlock/domain.c
index f5b78d4766cd..f0d83f43afa1 100644
--- a/security/landlock/domain.c
+++ b/security/landlock/domain.c
@@ -94,8 +94,7 @@ static struct landlock_details *get_current_details(void)
* allocate with GFP_KERNEL_ACCOUNT because it is independent from the
* caller.
*/
- details =
- kzalloc_flex(*details, exe_path, path_size);
+ details = kzalloc_flex(*details, exe_path, path_size);
if (!details)
return ERR_PTR(-ENOMEM);
diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c
index 319873586385..73018dc8d6c7 100644
--- a/security/landlock/ruleset.c
+++ b/security/landlock/ruleset.c
@@ -32,9 +32,8 @@ static struct landlock_ruleset *create_ruleset(const u32 num_layers)
{
struct landlock_ruleset *new_ruleset;
- new_ruleset =
- kzalloc_flex(*new_ruleset, access_masks, num_layers,
- GFP_KERNEL_ACCOUNT);
+ new_ruleset = kzalloc_flex(*new_ruleset, access_masks, num_layers,
+ GFP_KERNEL_ACCOUNT);
if (!new_ruleset)
return ERR_PTR(-ENOMEM);
refcount_set(&new_ruleset->usage, 1);
@@ -559,8 +558,8 @@ landlock_merge_ruleset(struct landlock_ruleset *const parent,
if (IS_ERR(new_dom))
return new_dom;
- new_dom->hierarchy = kzalloc_obj(*new_dom->hierarchy,
- GFP_KERNEL_ACCOUNT);
+ new_dom->hierarchy =
+ kzalloc_obj(*new_dom->hierarchy, GFP_KERNEL_ACCOUNT);
if (!new_dom->hierarchy)
return ERR_PTR(-ENOMEM);
diff --git a/security/landlock/tsync.c b/security/landlock/tsync.c
index de01aa899751..4d4427ba8d93 100644
--- a/security/landlock/tsync.c
+++ b/security/landlock/tsync.c
@@ -203,6 +203,40 @@ static struct tsync_work *tsync_works_provide(struct tsync_works *s,
return ctx;
}
+/**
+ * tsync_works_trim - Put the last tsync_work element
+ *
+ * @s: TSYNC works to trim.
+ *
+ * Put the last task and decrement the size of @s.
+ *
+ * This helper does not cancel a running task, but just reset the last element
+ * to zero.
+ */
+static void tsync_works_trim(struct tsync_works *s)
+{
+ struct tsync_work *ctx;
+
+ if (WARN_ON_ONCE(s->size <= 0))
+ return;
+
+ ctx = s->works[s->size - 1];
+
+ /*
+ * For consistency, remove the task from ctx so that it does not look like
+ * we handed it a task_work.
+ */
+ put_task_struct(ctx->task);
+ *ctx = (typeof(*ctx)){};
+
+ /*
+ * Cancel the tsync_works_provide() change to recycle the reserved memory
+ * for the next thread, if any. This also ensures that cancel_tsync_works()
+ * and tsync_works_release() do not see any NULL task pointers.
+ */
+ s->size--;
+}
+
/*
* tsync_works_grow_by - preallocates space for n more contexts in s
*
@@ -256,13 +290,14 @@ static int tsync_works_grow_by(struct tsync_works *s, size_t n, gfp_t flags)
* tsync_works_contains - checks for presence of task in s
*/
static bool tsync_works_contains_task(const struct tsync_works *s,
- struct task_struct *task)
+ const struct task_struct *task)
{
size_t i;
for (i = 0; i < s->size; i++)
if (s->works[i]->task == task)
return true;
+
return false;
}
@@ -276,7 +311,7 @@ static void tsync_works_release(struct tsync_works *s)
size_t i;
for (i = 0; i < s->size; i++) {
- if (!s->works[i]->task)
+ if (WARN_ON_ONCE(!s->works[i]->task))
continue;
put_task_struct(s->works[i]->task);
@@ -284,6 +319,7 @@ static void tsync_works_release(struct tsync_works *s)
for (i = 0; i < s->capacity; i++)
kfree(s->works[i]);
+
kfree(s->works);
s->works = NULL;
s->size = 0;
@@ -295,7 +331,7 @@ static void tsync_works_release(struct tsync_works *s)
*/
static size_t count_additional_threads(const struct tsync_works *works)
{
- struct task_struct *thread, *caller;
+ const struct task_struct *caller, *thread;
size_t n = 0;
caller = current;
@@ -334,7 +370,8 @@ static bool schedule_task_work(struct tsync_works *works,
struct tsync_shared_context *shared_ctx)
{
int err;
- struct task_struct *thread, *caller;
+ const struct task_struct *caller;
+ struct task_struct *thread;
struct tsync_work *ctx;
bool found_more_threads = false;
@@ -379,16 +416,14 @@ static bool schedule_task_work(struct tsync_works *works,
init_task_work(&ctx->work, restrict_one_thread_callback);
err = task_work_add(thread, &ctx->work, TWA_SIGNAL);
- if (err) {
+ if (unlikely(err)) {
/*
* task_work_add() only fails if the task is about to exit. We
* checked that earlier, but it can happen as a race. Resume
* without setting an error, as the task is probably gone in the
- * next loop iteration. For consistency, remove the task from ctx
- * so that it does not look like we handed it a task_work.
+ * next loop iteration.
*/
- put_task_struct(ctx->task);
- ctx->task = NULL;
+ tsync_works_trim(works);
atomic_dec(&shared_ctx->num_preparing);
atomic_dec(&shared_ctx->num_unfinished);
@@ -406,12 +441,15 @@ static bool schedule_task_work(struct tsync_works *works,
* shared_ctx->num_preparing and shared_ctx->num_unfished and mark the two
* completions if needed, as if the task was never scheduled.
*/
-static void cancel_tsync_works(struct tsync_works *works,
+static void cancel_tsync_works(const struct tsync_works *works,
struct tsync_shared_context *shared_ctx)
{
- int i;
+ size_t i;
for (i = 0; i < works->size; i++) {
+ if (WARN_ON_ONCE(!works->works[i]->task))
+ continue;
+
if (!task_work_cancel(works->works[i]->task,
&works->works[i]->work))
continue;
@@ -448,6 +486,16 @@ int landlock_restrict_sibling_threads(const struct cred *old_cred,
shared_ctx.set_no_new_privs = task_no_new_privs(current);
/*
+ * Serialize concurrent TSYNC operations to prevent deadlocks when
+ * multiple threads call landlock_restrict_self() simultaneously.
+ * If the lock is already held, we gracefully yield by restarting the
+ * syscall. This allows the current thread to process pending
+ * task_works before retrying.
+ */
+ if (!down_write_trylock(&current->signal->exec_update_lock))
+ return restart_syscall();
+
+ /*
* We schedule a pseudo-signal task_work for each of the calling task's
* sibling threads. In the task work, each thread:
*
@@ -527,24 +575,30 @@ int landlock_restrict_sibling_threads(const struct cred *old_cred,
-ERESTARTNOINTR);
/*
- * Cancel task works for tasks that did not start running yet,
- * and decrement all_prepared and num_unfinished accordingly.
+ * Opportunistic improvement: try to cancel task
+ * works for tasks that did not start running
+ * yet. We do not have a guarantee that it
+ * cancels any of the enqueued task works
+ * because task_work_run() might already have
+ * dequeued them.
*/
cancel_tsync_works(&works, &shared_ctx);
/*
- * The remaining task works have started running, so waiting for
- * their completion will finish.
+ * Break the loop with error. The cleanup code
+ * after the loop unblocks the remaining
+ * task_works.
*/
- wait_for_completion(&shared_ctx.all_prepared);
+ break;
}
}
} while (found_more_threads &&
!atomic_read(&shared_ctx.preparation_error));
/*
- * We now have all sibling threads blocking and in "prepared" state in the
- * task work. Ask all threads to commit.
+ * We now have either (a) all sibling threads blocking and in "prepared"
+ * state in the task work, or (b) the preparation error is set. Ask all
+ * threads to commit (or abort).
*/
complete_all(&shared_ctx.ready_to_commit);
@@ -556,6 +610,6 @@ int landlock_restrict_sibling_threads(const struct cred *old_cred,
wait_for_completion(&shared_ctx.all_finished);
tsync_works_release(&works);
-
+ up_write(&current->signal->exec_update_lock);
return atomic_read(&shared_ctx.preparation_error);
}
diff --git a/security/security.c b/security/security.c
index 67af9228c4e9..a26c1474e2e4 100644
--- a/security/security.c
+++ b/security/security.c
@@ -61,6 +61,7 @@ const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX + 1] = {
[LOCKDOWN_BPF_WRITE_USER] = "use of bpf to write user RAM",
[LOCKDOWN_DBG_WRITE_KERNEL] = "use of kgdb/kdb to write kernel RAM",
[LOCKDOWN_RTAS_ERROR_INJECTION] = "RTAS error injection",
+ [LOCKDOWN_XEN_USER_ACTIONS] = "Xen guest user action",
[LOCKDOWN_INTEGRITY_MAX] = "integrity",
[LOCKDOWN_KCORE] = "/proc/kcore access",
[LOCKDOWN_KPROBES] = "use of kprobes",
diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c
index e97721f80f65..8e70da850fac 100644
--- a/sound/firewire/amdtp-stream.c
+++ b/sound/firewire/amdtp-stream.c
@@ -1164,7 +1164,7 @@ static void process_rx_packets(struct fw_iso_context *context, u32 tstamp, size_
struct pkt_desc *desc = s->packet_descs_cursor;
unsigned int pkt_header_length;
unsigned int packets;
- u32 curr_cycle_time;
+ u32 curr_cycle_time = 0;
bool need_hw_irq;
int i;
diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c
index ab4b22fcb72e..6db23ce8a5fe 100644
--- a/sound/hda/codecs/realtek/alc269.c
+++ b/sound/hda/codecs/realtek/alc269.c
@@ -1017,12 +1017,30 @@ static int alc269_resume(struct hda_codec *codec)
return 0;
}
-#define STARLABS_STARFIGHTER_SHUTUP_DELAY_MS 30
+#define ALC233_STARFIGHTER_SPK_PIN 0x1b
+#define ALC233_STARFIGHTER_GPIO2 0x04
-static void starlabs_starfighter_shutup(struct hda_codec *codec)
+static void alc233_starfighter_update_amp(struct hda_codec *codec, bool on)
{
- if (snd_hda_gen_shutup_speakers(codec))
- msleep(STARLABS_STARFIGHTER_SHUTUP_DELAY_MS);
+ snd_hda_codec_write(codec, ALC233_STARFIGHTER_SPK_PIN, 0,
+ AC_VERB_SET_EAPD_BTLENABLE,
+ on ? AC_EAPDBTL_EAPD : 0);
+ alc_update_gpio_data(codec, ALC233_STARFIGHTER_GPIO2, on);
+}
+
+static void alc233_starfighter_pcm_hook(struct hda_pcm_stream *hinfo,
+ struct hda_codec *codec,
+ struct snd_pcm_substream *substream,
+ int action)
+{
+ switch (action) {
+ case HDA_GEN_PCM_ACT_PREPARE:
+ alc233_starfighter_update_amp(codec, true);
+ break;
+ case HDA_GEN_PCM_ACT_CLEANUP:
+ alc233_starfighter_update_amp(codec, false);
+ break;
+ }
}
static void alc233_fixup_starlabs_starfighter(struct hda_codec *codec,
@@ -1031,8 +1049,16 @@ static void alc233_fixup_starlabs_starfighter(struct hda_codec *codec,
{
struct alc_spec *spec = codec->spec;
- if (action == HDA_FIXUP_ACT_PRE_PROBE)
- spec->shutup = starlabs_starfighter_shutup;
+ switch (action) {
+ case HDA_FIXUP_ACT_PRE_PROBE:
+ spec->gpio_mask |= ALC233_STARFIGHTER_GPIO2;
+ spec->gpio_dir |= ALC233_STARFIGHTER_GPIO2;
+ spec->gpio_data &= ~ALC233_STARFIGHTER_GPIO2;
+ break;
+ case HDA_FIXUP_ACT_PROBE:
+ spec->gen.pcm_playback_hook = alc233_starfighter_pcm_hook;
+ break;
+ }
}
static void alc269_fixup_pincfg_no_hp_to_lineout(struct hda_codec *codec,
@@ -3699,22 +3725,42 @@ static void alc245_tas2781_spi_hp_fixup_muteled(struct hda_codec *codec,
alc_fixup_hp_gpio_led(codec, action, 0x04, 0x0);
alc285_fixup_hp_coef_micmute_led(codec, fix, action);
}
+
+static void alc245_hp_spk_mute_led_update(void *private_data, int enabled)
+{
+ struct hda_codec *codec = private_data;
+ unsigned int val;
+
+ val = enabled ? 0x08 : 0x04; /* 0x08 led on, 0x04 led off */
+ alc_update_coef_idx(codec, 0x0b, 0x0c, val);
+}
+
/* JD2: mute led GPIO3: micmute led */
static void alc245_tas2781_i2c_hp_fixup_muteled(struct hda_codec *codec,
const struct hda_fixup *fix, int action)
{
struct alc_spec *spec = codec->spec;
+ hda_nid_t hp_pin = alc_get_hp_pin(spec);
static const hda_nid_t conn[] = { 0x02 };
switch (action) {
case HDA_FIXUP_ACT_PRE_PROBE:
+ if (!hp_pin) {
+ spec->gen.vmaster_mute.hook = alc245_hp_spk_mute_led_update;
+ spec->gen.vmaster_mute_led = 1;
+ }
spec->gen.auto_mute_via_amp = 1;
snd_hda_override_conn_list(codec, 0x17, ARRAY_SIZE(conn), conn);
break;
+ case HDA_FIXUP_ACT_INIT:
+ if (!hp_pin)
+ alc245_hp_spk_mute_led_update(codec, !spec->gen.master_mute);
+ break;
}
tas2781_fixup_txnw_i2c(codec, fix, action);
- alc245_fixup_hp_mute_led_coefbit(codec, fix, action);
+ if (hp_pin)
+ alc245_fixup_hp_mute_led_coefbit(codec, fix, action);
alc285_fixup_hp_coef_micmute_led(codec, fix, action);
}
/*
@@ -4076,6 +4122,8 @@ enum {
ALC233_FIXUP_LENOVO_GPIO2_MIC_HOTKEY,
ALC245_FIXUP_BASS_HP_DAC,
ALC245_FIXUP_ACER_MICMUTE_LED,
+ ALC245_FIXUP_CS35L41_I2C_2_MUTE_LED,
+ ALC236_FIXUP_HP_DMIC,
};
/* A special fixup for Lenovo C940 and Yoga Duet 7;
@@ -6605,6 +6653,19 @@ static const struct hda_fixup alc269_fixups[] = {
.v.func = alc285_fixup_hp_coef_micmute_led,
.chained = true,
.chain_id = ALC2XX_FIXUP_HEADSET_MIC,
+ },
+ [ALC245_FIXUP_CS35L41_I2C_2_MUTE_LED] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc245_fixup_hp_mute_led_coefbit,
+ .chained = true,
+ .chain_id = ALC287_FIXUP_CS35L41_I2C_2,
+ },
+ [ALC236_FIXUP_HP_DMIC] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+ { 0x12, 0x90a60160 }, /* use as internal mic */
+ { }
+ },
}
};
@@ -6659,6 +6720,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1025, 0x1597, "Acer Nitro 5 AN517-55", ALC2XX_FIXUP_HEADSET_MIC),
SND_PCI_QUIRK(0x1025, 0x169a, "Acer Swift SFG16", ALC256_FIXUP_ACER_SFG16_MICMUTE_LED),
SND_PCI_QUIRK(0x1025, 0x171e, "Acer Nitro ANV15-51", ALC245_FIXUP_ACER_MICMUTE_LED),
+ SND_PCI_QUIRK(0x1025, 0x173a, "Acer Swift SFG14-73", ALC245_FIXUP_ACER_MICMUTE_LED),
SND_PCI_QUIRK(0x1025, 0x1826, "Acer Helios ZPC", ALC287_FIXUP_PREDATOR_SPK_CS35L41_I2C_2),
SND_PCI_QUIRK(0x1025, 0x182c, "Acer Helios ZPD", ALC287_FIXUP_PREDATOR_SPK_CS35L41_I2C_2),
SND_PCI_QUIRK(0x1025, 0x1844, "Acer Helios ZPS", ALC287_FIXUP_PREDATOR_SPK_CS35L41_I2C_2),
@@ -6854,6 +6916,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8730, "HP ProBook 445 G7", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
SND_PCI_QUIRK(0x103c, 0x8735, "HP ProBook 435 G7", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
SND_PCI_QUIRK(0x103c, 0x8736, "HP", ALC285_FIXUP_HP_GPIO_AMP_INIT),
+ SND_PCI_QUIRK(0x103c, 0x8756, "HP ENVY Laptop 13-ba0xxx", ALC245_FIXUP_HP_X360_MUTE_LEDS),
SND_PCI_QUIRK(0x103c, 0x8760, "HP EliteBook 8{4,5}5 G7", ALC285_FIXUP_HP_BEEP_MICMUTE_LED),
SND_PCI_QUIRK(0x103c, 0x876e, "HP ENVY x360 Convertible 13-ay0xxx", ALC245_FIXUP_HP_X360_MUTE_LEDS),
SND_PCI_QUIRK(0x103c, 0x877a, "HP", ALC285_FIXUP_HP_MUTE_LED),
@@ -6867,6 +6930,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8788, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED),
SND_PCI_QUIRK(0x103c, 0x87b7, "HP Laptop 14-fq0xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2),
SND_PCI_QUIRK(0x103c, 0x87c8, "HP", ALC287_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x87cb, "HP Pavilion 15-eg0xxx", ALC287_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x87cc, "HP Pavilion 15-eg0xxx", ALC287_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x87d3, "HP Laptop 15-gw0xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2),
SND_PCI_QUIRK(0x103c, 0x87df, "HP ProBook 430 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED),
@@ -6954,6 +7018,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8a30, "HP Envy 17", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8a31, "HP Envy 15", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8a34, "HP Pavilion x360 2-in-1 Laptop 14-ek0xxx", ALC245_FIXUP_HP_MUTE_LED_COEFBIT),
+ SND_PCI_QUIRK(0x103c, 0x8a3d, "HP Victus 15-fb0xxx (MB 8A3D)", ALC245_FIXUP_HP_MUTE_LED_V2_COEFBIT),
SND_PCI_QUIRK(0x103c, 0x8a4f, "HP Victus 15-fa0xxx (MB 8A4F)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT),
SND_PCI_QUIRK(0x103c, 0x8a6e, "HP EDNA 360", ALC287_FIXUP_CS35L41_I2C_4),
SND_PCI_QUIRK(0x103c, 0x8a74, "HP ProBook 440 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED),
@@ -7097,7 +7162,9 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8da1, "HP 16 Clipper OmniBook X", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8da7, "HP 14 Enstrom OmniBook X", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8da8, "HP 16 Piston OmniBook X", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x103c, 0x8dc9, "HP Laptop 15-fc0xxx", ALC236_FIXUP_HP_DMIC),
SND_PCI_QUIRK(0x103c, 0x8dd4, "HP EliteStudio 8 AIO", ALC274_FIXUP_HP_AIO_BIND_DACS),
+ SND_PCI_QUIRK(0x103c, 0x8dd7, "HP Laptop 15-fd0xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2),
SND_PCI_QUIRK(0x103c, 0x8de8, "HP Gemtree", ALC245_FIXUP_TAS2781_SPI_2),
SND_PCI_QUIRK(0x103c, 0x8de9, "HP Gemtree", ALC245_FIXUP_TAS2781_SPI_2),
SND_PCI_QUIRK(0x103c, 0x8dec, "HP EliteBook 640 G12", ALC236_FIXUP_HP_GPIO_LED),
@@ -7128,7 +7195,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8e37, "HP 16 Piston OmniBook X", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e3a, "HP Agusta", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e3b, "HP Agusta", ALC287_FIXUP_CS35L41_I2C_2),
- SND_PCI_QUIRK(0x103c, 0x8e60, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x103c, 0x8e60, "HP OmniBook 7 Laptop 16-bh0xxx", ALC245_FIXUP_CS35L41_I2C_2_MUTE_LED),
SND_PCI_QUIRK(0x103c, 0x8e61, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e62, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e8a, "HP NexusX", ALC245_FIXUP_HP_TAS2781_I2C_MUTE_LED),
@@ -7177,6 +7244,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1043, 0x115d, "Asus 1015E", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
SND_PCI_QUIRK(0x1043, 0x1194, "ASUS UM3406KA", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x1043, 0x11c0, "ASUS X556UR", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
+ HDA_CODEC_QUIRK(0x1043, 0x1204, "ASUS Strix G16 G615JMR", ALC287_FIXUP_TXNW2781_I2C_ASUS),
SND_PCI_QUIRK(0x1043, 0x1204, "ASUS Strix G615JHR_JMR_JPR", ALC287_FIXUP_TAS2781_I2C),
SND_PCI_QUIRK(0x1043, 0x1214, "ASUS Strix G615LH_LM_LP", ALC287_FIXUP_TAS2781_I2C),
SND_PCI_QUIRK(0x1043, 0x125e, "ASUS Q524UQK", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
@@ -7206,10 +7274,12 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1043, 0x14e3, "ASUS G513PI/PU/PV", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x1043, 0x14f2, "ASUS VivoBook X515JA", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1043, 0x1503, "ASUS G733PY/PZ/PZV/PYV", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x1043, 0x1514, "ASUS ROG Flow Z13 GZ302EAC", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x1043, 0x1517, "Asus Zenbook UX31A", ALC269VB_FIXUP_ASUS_ZENBOOK_UX31A),
SND_PCI_QUIRK(0x1043, 0x1533, "ASUS GV302XA/XJ/XQ/XU/XV/XI", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x1043, 0x1573, "ASUS GZ301VV/VQ/VU/VJ/VA/VC/VE/VVC/VQC/VUC/VJC/VEC/VCC", ALC285_FIXUP_ASUS_HEADSET_MIC),
SND_PCI_QUIRK(0x1043, 0x1584, "ASUS UM3406GA ", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x1043, 0x1602, "ASUS ROG Strix SCAR 15", ALC285_FIXUP_ASUS_G533Z_PINS),
SND_PCI_QUIRK(0x1043, 0x1652, "ASUS ROG Zephyrus Do 15 SE", ALC289_FIXUP_ASUS_ZEPHYRUS_DUAL_SPK),
SND_PCI_QUIRK(0x1043, 0x1662, "ASUS GV301QH", ALC294_FIXUP_ASUS_DUAL_SPK),
SND_PCI_QUIRK(0x1043, 0x1663, "ASUS GU603ZI/ZJ/ZQ/ZU/ZV", ALC285_FIXUP_ASUS_HEADSET_MIC),
@@ -7349,6 +7419,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x144d, 0xc188, "Samsung Galaxy Book Flex (NT950QCT-A38A)", ALC298_FIXUP_SAMSUNG_AMP),
SND_PCI_QUIRK(0x144d, 0xc189, "Samsung Galaxy Book Flex (NT950QCG-X716)", ALC298_FIXUP_SAMSUNG_AMP),
SND_PCI_QUIRK(0x144d, 0xc18a, "Samsung Galaxy Book Ion (NP930XCJ-K01US)", ALC298_FIXUP_SAMSUNG_AMP),
+ SND_PCI_QUIRK(0x144d, 0xc1ac, "Samsung Galaxy Book2 Pro 360 (NP950QED)", ALC298_FIXUP_SAMSUNG_AMP_V2_2_AMPS),
SND_PCI_QUIRK(0x144d, 0xc1a3, "Samsung Galaxy Book Pro (NP935XDB-KC1SE)", ALC298_FIXUP_SAMSUNG_AMP),
SND_PCI_QUIRK(0x144d, 0xc1a4, "Samsung Galaxy Book Pro 360 (NT935QBD)", ALC298_FIXUP_SAMSUNG_AMP),
SND_PCI_QUIRK(0x144d, 0xc1a6, "Samsung Galaxy Book Pro 360 (NP930QBD)", ALC298_FIXUP_SAMSUNG_AMP),
@@ -7544,6 +7615,10 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x17aa, 0x3834, "Lenovo IdeaPad Slim 9i 14ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS),
SND_PCI_QUIRK(0x17aa, 0x383d, "Legion Y9000X 2019", ALC285_FIXUP_LEGION_Y9000X_SPEAKERS),
SND_PCI_QUIRK(0x17aa, 0x3843, "Lenovo Yoga 9i / Yoga Book 9i", ALC287_FIXUP_LENOVO_YOGA_BOOK_9I),
+ /* Yoga Pro 7 14IMH9 shares PCI SSID 17aa:3847 with Legion 7 16ACHG6;
+ * use codec SSID to distinguish them
+ */
+ HDA_CODEC_QUIRK(0x17aa, 0x38cf, "Lenovo Yoga Pro 7 14IMH9", ALC287_FIXUP_YOGA9_14IMH9_BASS_SPK_PIN),
SND_PCI_QUIRK(0x17aa, 0x3847, "Legion 7 16ACHG6", ALC287_FIXUP_LEGION_16ACHG6),
SND_PCI_QUIRK(0x17aa, 0x384a, "Lenovo Yoga 7 15ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS),
SND_PCI_QUIRK(0x17aa, 0x3852, "Lenovo Yoga 7 14ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS),
@@ -7575,6 +7650,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x17aa, 0x38ab, "Thinkbook 16P", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD),
SND_PCI_QUIRK(0x17aa, 0x38b4, "Legion Slim 7 16IRH8", ALC287_FIXUP_CS35L41_I2C_2),
HDA_CODEC_QUIRK(0x17aa, 0x391c, "Lenovo Yoga 7 2-in-1 14AKP10", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN),
+ HDA_CODEC_QUIRK(0x17aa, 0x391d, "Lenovo Yoga 7 2-in-1 16AKP10", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN),
SND_PCI_QUIRK(0x17aa, 0x38b5, "Legion Slim 7 16IRH8", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x17aa, 0x38b6, "Legion Slim 7 16APH8", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x17aa, 0x38b7, "Legion Slim 7 16APH8", ALC287_FIXUP_CS35L41_I2C_2),
@@ -7605,6 +7681,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI),
SND_PCI_QUIRK(0x17aa, 0x390d, "Lenovo Yoga Pro 7 14ASP10", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN),
SND_PCI_QUIRK(0x17aa, 0x3913, "Lenovo 145", ALC236_FIXUP_LENOVO_INV_DMIC),
+ SND_PCI_QUIRK(0x17aa, 0x391a, "Lenovo Yoga Slim 7 14AKP10", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN),
SND_PCI_QUIRK(0x17aa, 0x391f, "Yoga S990-16 pro Quad YC Quad", ALC287_FIXUP_TXNW2781_I2C),
SND_PCI_QUIRK(0x17aa, 0x3920, "Yoga S990-16 pro Quad VECO Quad", ALC287_FIXUP_TXNW2781_I2C),
SND_PCI_QUIRK(0x17aa, 0x3929, "Thinkbook 13x Gen 5", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD),
@@ -7698,6 +7775,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0xf111, 0x0009, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0xf111, 0x000b, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0xf111, 0x000c, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0xf111, 0x000f, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE),
#if 0
/* Below is a quirk table taken from the old code.
diff --git a/sound/hda/controllers/intel.c b/sound/hda/controllers/intel.c
index 3f434994c18d..8a7bd49e411f 100644
--- a/sound/hda/controllers/intel.c
+++ b/sound/hda/controllers/intel.c
@@ -2077,7 +2077,6 @@ static const struct pci_device_id driver_denylist[] = {
{ PCI_DEVICE_SUB(0x1022, 0x1487, 0x1043, 0x874f) }, /* ASUS ROG Zenith II / Strix */
{ PCI_DEVICE_SUB(0x1022, 0x1487, 0x1462, 0xcb59) }, /* MSI TRX40 Creator */
{ PCI_DEVICE_SUB(0x1022, 0x1487, 0x1462, 0xcb60) }, /* MSI TRX40 */
- { PCI_DEVICE_SUB(0x1022, 0x15e3, 0x1462, 0xee59) }, /* MSI X870E Tomahawk WiFi */
{}
};
@@ -2086,6 +2085,11 @@ static struct pci_device_id driver_denylist_ideapad_z570[] = {
{}
};
+static struct pci_device_id driver_denylist_msi_x870e[] = {
+ { PCI_DEVICE_SUB(0x1022, 0x15e3, 0x1462, 0xee59) }, /* MSI X870E Tomahawk WiFi */
+ {}
+};
+
/* DMI-based denylist, to be used when:
* - PCI subsystem IDs are zero, impossible to distinguish from valid sound cards.
* - Different modifications of the same laptop use different GPU models.
@@ -2099,6 +2103,14 @@ static const struct dmi_system_id driver_denylist_dmi[] = {
},
.driver_data = &driver_denylist_ideapad_z570,
},
+ {
+ /* PCI device matching alone incorrectly matches some laptops */
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Micro-Star International Co., Ltd."),
+ DMI_MATCH(DMI_BOARD_NAME, "MAG X870E TOMAHAWK WIFI (MS-7E59)"),
+ },
+ .driver_data = &driver_denylist_msi_x870e,
+ },
{}
};
diff --git a/sound/pci/asihpi/hpimsgx.c b/sound/pci/asihpi/hpimsgx.c
index b68e6bfbbfba..ed1c7b774436 100644
--- a/sound/pci/asihpi/hpimsgx.c
+++ b/sound/pci/asihpi/hpimsgx.c
@@ -581,8 +581,10 @@ static u16 adapter_prepare(u16 adapter)
HPI_ADAPTER_OPEN);
hm.adapter_index = adapter;
hw_entry_point(&hm, &hr);
- memcpy(&rESP_HPI_ADAPTER_OPEN[adapter], &hr,
- sizeof(rESP_HPI_ADAPTER_OPEN[0]));
+ memcpy(&rESP_HPI_ADAPTER_OPEN[adapter].h, &hr,
+ sizeof(rESP_HPI_ADAPTER_OPEN[adapter].h));
+ memcpy(&rESP_HPI_ADAPTER_OPEN[adapter].a, &hr.u.ax.info,
+ sizeof(rESP_HPI_ADAPTER_OPEN[adapter].a));
if (hr.error)
return hr.error;
diff --git a/sound/pci/ctxfi/ctatc.c b/sound/pci/ctxfi/ctatc.c
index f122e396bc55..da2667cb2489 100644
--- a/sound/pci/ctxfi/ctatc.c
+++ b/sound/pci/ctxfi/ctatc.c
@@ -1427,10 +1427,14 @@ static int atc_get_resources(struct ct_atc *atc)
daio_mgr = (struct daio_mgr *)atc->rsc_mgrs[DAIO];
da_desc.msr = atc->msr;
for (i = 0; i < NUM_DAIOTYP; i++) {
- if (((i == MIC) && !cap.dedicated_mic) || ((i == RCA) && !cap.dedicated_rca))
+ if (((i == MIC) && !cap.dedicated_mic) ||
+ ((i == RCA) && !cap.dedicated_rca) ||
+ i == SPDIFI1)
continue;
- da_desc.type = (atc->model != CTSB073X) ? i :
- ((i == SPDIFIO) ? SPDIFI1 : i);
+ if (atc->model == CTSB073X && i == SPDIFIO)
+ da_desc.type = SPDIFI1;
+ else
+ da_desc.type = i;
da_desc.output = (i < LINEIM) || (i == RCA);
err = daio_mgr->get_daio(daio_mgr, &da_desc,
(struct daio **)&atc->daios[i]);
diff --git a/sound/pci/ctxfi/ctdaio.c b/sound/pci/ctxfi/ctdaio.c
index b8bde27f3a1d..4dbb1dd7af32 100644
--- a/sound/pci/ctxfi/ctdaio.c
+++ b/sound/pci/ctxfi/ctdaio.c
@@ -99,7 +99,7 @@ static const struct rsc_ops daio_in_rsc_ops_20k2 = {
.output_slot = daio_index,
};
-static unsigned int daio_device_index(enum DAIOTYP type, struct hw *hw)
+static int daio_device_index(enum DAIOTYP type, struct hw *hw)
{
switch (hw->chip_type) {
case ATC20K1:
@@ -112,12 +112,15 @@ static unsigned int daio_device_index(enum DAIOTYP type, struct hw *hw)
case LINEO3: return 5;
case LINEO4: return 6;
case LINEIM: return 7;
- default: return -EINVAL;
+ default:
+ pr_err("ctxfi: Invalid type %d for hw20k1\n", type);
+ return -EINVAL;
}
case ATC20K2:
switch (type) {
case SPDIFOO: return 0;
case SPDIFIO: return 0;
+ case SPDIFI1: return 1;
case LINEO1: return 4;
case LINEO2: return 7;
case LINEO3: return 5;
@@ -125,9 +128,12 @@ static unsigned int daio_device_index(enum DAIOTYP type, struct hw *hw)
case LINEIM: return 4;
case MIC: return 5;
case RCA: return 3;
- default: return -EINVAL;
+ default:
+ pr_err("ctxfi: Invalid type %d for hw20k2\n", type);
+ return -EINVAL;
}
default:
+ pr_err("ctxfi: Invalid chip type %d\n", hw->chip_type);
return -EINVAL;
}
}
@@ -148,8 +154,11 @@ static int dao_spdif_set_spos(struct dao *dao, unsigned int spos)
static int dao_commit_write(struct dao *dao)
{
- dao->hw->dao_commit_write(dao->hw,
- daio_device_index(dao->daio.type, dao->hw), dao->ctrl_blk);
+ int idx = daio_device_index(dao->daio.type, dao->hw);
+
+ if (idx < 0)
+ return idx;
+ dao->hw->dao_commit_write(dao->hw, idx, dao->ctrl_blk);
return 0;
}
@@ -287,8 +296,11 @@ static int dai_set_enb_srt(struct dai *dai, unsigned int enb)
static int dai_commit_write(struct dai *dai)
{
- dai->hw->dai_commit_write(dai->hw,
- daio_device_index(dai->daio.type, dai->hw), dai->ctrl_blk);
+ int idx = daio_device_index(dai->daio.type, dai->hw);
+
+ if (idx < 0)
+ return idx;
+ dai->hw->dai_commit_write(dai->hw, idx, dai->ctrl_blk);
return 0;
}
@@ -367,7 +379,7 @@ static int dao_rsc_init(struct dao *dao,
{
struct hw *hw = mgr->mgr.hw;
unsigned int conf;
- int err;
+ int idx, err;
err = daio_rsc_init(&dao->daio, desc, mgr->mgr.hw);
if (err)
@@ -386,15 +398,18 @@ static int dao_rsc_init(struct dao *dao,
if (err)
goto error2;
- hw->daio_mgr_dsb_dao(mgr->mgr.ctrl_blk,
- daio_device_index(dao->daio.type, hw));
+ idx = daio_device_index(dao->daio.type, hw);
+ if (idx < 0) {
+ err = idx;
+ goto error2;
+ }
+
+ hw->daio_mgr_dsb_dao(mgr->mgr.ctrl_blk, idx);
hw->daio_mgr_commit_write(hw, mgr->mgr.ctrl_blk);
conf = (desc->msr & 0x7) | (desc->passthru << 3);
- hw->daio_mgr_dao_init(hw, mgr->mgr.ctrl_blk,
- daio_device_index(dao->daio.type, hw), conf);
- hw->daio_mgr_enb_dao(mgr->mgr.ctrl_blk,
- daio_device_index(dao->daio.type, hw));
+ hw->daio_mgr_dao_init(hw, mgr->mgr.ctrl_blk, idx, conf);
+ hw->daio_mgr_enb_dao(mgr->mgr.ctrl_blk, idx);
hw->daio_mgr_commit_write(hw, mgr->mgr.ctrl_blk);
return 0;
@@ -443,7 +458,7 @@ static int dai_rsc_init(struct dai *dai,
const struct daio_desc *desc,
struct daio_mgr *mgr)
{
- int err;
+ int idx, err;
struct hw *hw = mgr->mgr.hw;
unsigned int rsr, msr;
@@ -457,6 +472,12 @@ static int dai_rsc_init(struct dai *dai,
if (err)
goto error1;
+ idx = daio_device_index(dai->daio.type, dai->hw);
+ if (idx < 0) {
+ err = idx;
+ goto error1;
+ }
+
for (rsr = 0, msr = desc->msr; msr > 1; msr >>= 1)
rsr++;
@@ -465,8 +486,7 @@ static int dai_rsc_init(struct dai *dai,
/* default to disabling control of a SRC */
hw->dai_srt_set_ec(dai->ctrl_blk, 0);
hw->dai_srt_set_et(dai->ctrl_blk, 0); /* default to disabling SRT */
- hw->dai_commit_write(hw,
- daio_device_index(dai->daio.type, dai->hw), dai->ctrl_blk);
+ hw->dai_commit_write(hw, idx, dai->ctrl_blk);
return 0;
@@ -581,28 +601,28 @@ static int put_daio_rsc(struct daio_mgr *mgr, struct daio *daio)
static int daio_mgr_enb_daio(struct daio_mgr *mgr, struct daio *daio)
{
struct hw *hw = mgr->mgr.hw;
-
- if (daio->output) {
- hw->daio_mgr_enb_dao(mgr->mgr.ctrl_blk,
- daio_device_index(daio->type, hw));
- } else {
- hw->daio_mgr_enb_dai(mgr->mgr.ctrl_blk,
- daio_device_index(daio->type, hw));
- }
+ int idx = daio_device_index(daio->type, hw);
+
+ if (idx < 0)
+ return idx;
+ if (daio->output)
+ hw->daio_mgr_enb_dao(mgr->mgr.ctrl_blk, idx);
+ else
+ hw->daio_mgr_enb_dai(mgr->mgr.ctrl_blk, idx);
return 0;
}
static int daio_mgr_dsb_daio(struct daio_mgr *mgr, struct daio *daio)
{
struct hw *hw = mgr->mgr.hw;
-
- if (daio->output) {
- hw->daio_mgr_dsb_dao(mgr->mgr.ctrl_blk,
- daio_device_index(daio->type, hw));
- } else {
- hw->daio_mgr_dsb_dai(mgr->mgr.ctrl_blk,
- daio_device_index(daio->type, hw));
- }
+ int idx = daio_device_index(daio->type, hw);
+
+ if (idx < 0)
+ return idx;
+ if (daio->output)
+ hw->daio_mgr_dsb_dao(mgr->mgr.ctrl_blk, idx);
+ else
+ hw->daio_mgr_dsb_dai(mgr->mgr.ctrl_blk, idx);
return 0;
}
diff --git a/sound/soc/amd/acp-config.c b/sound/soc/amd/acp-config.c
index 365209ea53f3..1604ed679224 100644
--- a/sound/soc/amd/acp-config.c
+++ b/sound/soc/amd/acp-config.c
@@ -23,6 +23,16 @@
static int acp_quirk_data;
+static const struct dmi_system_id acp70_acpi_flag_override_table[] = {
+ {
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "HN7306EA"),
+ },
+ },
+ {}
+};
+
static const struct config_entry config_table[] = {
{
.flags = FLAG_AMD_SOF,
@@ -186,8 +196,11 @@ int snd_amd_acp_find_config(struct pci_dev *pci)
*/
if (!pci->revision)
return 0;
- else if (pci->revision >= ACP_7_0_REV)
+ else if (pci->revision >= ACP_7_0_REV) {
+ if (dmi_check_system(acp70_acpi_flag_override_table))
+ return 0;
return snd_amd_acp_acpi_find_config(pci);
+ }
for (i = 0; i < ARRAY_SIZE(config_table); i++, table++) {
if (table->device != device)
diff --git a/sound/soc/amd/acp/acp-sdw-legacy-mach.c b/sound/soc/amd/acp/acp-sdw-legacy-mach.c
index c30ccf23005a..6388cd7cb28e 100644
--- a/sound/soc/amd/acp/acp-sdw-legacy-mach.c
+++ b/sound/soc/amd/acp/acp-sdw-legacy-mach.c
@@ -111,6 +111,14 @@ static const struct dmi_system_id soc_sdw_quirk_table[] = {
},
.driver_data = (void *)(ASOC_SDW_CODEC_SPKR),
},
+ {
+ .callback = soc_sdw_quirk_cb,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "HN7306EA"),
+ },
+ .driver_data = (void *)(ASOC_SDW_ACP_DMIC),
+ },
{}
};
diff --git a/sound/soc/amd/acp/amd-acp70-acpi-match.c b/sound/soc/amd/acp/amd-acp70-acpi-match.c
index 7a567ba02292..1ae43df5da6c 100644
--- a/sound/soc/amd/acp/amd-acp70-acpi-match.c
+++ b/sound/soc/amd/acp/amd-acp70-acpi-match.c
@@ -69,6 +69,28 @@ static const struct snd_soc_acpi_endpoint jack_amp_g1_dmic_endpoints[] = {
},
};
+static const struct snd_soc_acpi_endpoint jack_dmic_endpoints[] = {
+ /* Jack Endpoint */
+ {
+ .num = 0,
+ .aggregated = 0,
+ .group_position = 0,
+ .group_id = 0,
+ },
+ /* DMIC Endpoint */
+ {
+ /*
+ * rt721 endpoint #2 maps to AIF3 (internal DMIC capture).
+ * Endpoint #1 is AIF2 amp path and is handled by external amps
+ * on this platform.
+ */
+ .num = 2,
+ .aggregated = 0,
+ .group_position = 0,
+ .group_id = 0,
+ },
+};
+
static const struct snd_soc_acpi_adr_device rt712_vb_1_group1_adr[] = {
{
.adr = 0x000130025D071201ull,
@@ -563,6 +585,40 @@ static const struct snd_soc_acpi_link_adr acp70_rt1320_l0_rt722_l1[] = {
{}
};
+static const struct snd_soc_acpi_adr_device rt721_l1u0_tas2783x2_l1u8b_adr[] = {
+ {
+ .adr = 0x000130025D072101ull,
+ /*
+ * On this platform speakers are provided by two TAS2783 amps.
+ * Keep rt721 as UAJ + DMIC only.
+ */
+ .num_endpoints = ARRAY_SIZE(jack_dmic_endpoints),
+ .endpoints = jack_dmic_endpoints,
+ .name_prefix = "rt721",
+ },
+ {
+ .adr = 0x0001380102000001ull,
+ .num_endpoints = 1,
+ .endpoints = &spk_l_endpoint,
+ .name_prefix = "tas2783-1",
+ },
+ {
+ .adr = 0x00013B0102000001ull,
+ .num_endpoints = 1,
+ .endpoints = &spk_r_endpoint,
+ .name_prefix = "tas2783-2",
+ },
+};
+
+static const struct snd_soc_acpi_link_adr acp70_rt721_l1u0_tas2783x2_l1u8b[] = {
+ {
+ .mask = BIT(1),
+ .num_adr = ARRAY_SIZE(rt721_l1u0_tas2783x2_l1u8b_adr),
+ .adr_d = rt721_l1u0_tas2783x2_l1u8b_adr,
+ },
+ {}
+};
+
struct snd_soc_acpi_mach snd_soc_acpi_amd_acp70_sdw_machines[] = {
{
.link_mask = BIT(0) | BIT(1),
@@ -650,6 +706,11 @@ struct snd_soc_acpi_mach snd_soc_acpi_amd_acp70_sdw_machines[] = {
.machine_check = snd_soc_acpi_amd_sdca_is_device_rt712_vb,
.drv_name = "amd_sdw",
},
+ {
+ .link_mask = BIT(1),
+ .links = acp70_rt721_l1u0_tas2783x2_l1u8b,
+ .drv_name = "amd_sdw",
+ },
{},
};
EXPORT_SYMBOL(snd_soc_acpi_amd_acp70_sdw_machines);
diff --git a/sound/soc/amd/ps/pci-ps.c b/sound/soc/amd/ps/pci-ps.c
index 3a20cc10d61f..9751cf0784a6 100644
--- a/sound/soc/amd/ps/pci-ps.c
+++ b/sound/soc/amd/ps/pci-ps.c
@@ -339,7 +339,7 @@ static struct snd_soc_acpi_mach *acp63_sdw_machine_select(struct device *dev)
mach->mach_params.subsystem_device = acp_data->subsystem_device;
mach->mach_params.subsystem_id_set = true;
- dev_dbg(dev, "SSID %x%x\n", mach->mach_params.subsystem_vendor,
+ dev_dbg(dev, "SSID %x%04x\n", mach->mach_params.subsystem_vendor,
mach->mach_params.subsystem_device);
return mach;
}
diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c
index 1324543b42d7..aa6200933182 100644
--- a/sound/soc/amd/yc/acp6x-mach.c
+++ b/sound/soc/amd/yc/acp6x-mach.c
@@ -48,6 +48,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
{
.driver_data = &acp6x_card,
.matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "HP"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "HP Laptop 15-fc0xxx"),
+ }
+ },
+ {
+ .driver_data = &acp6x_card,
+ .matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "Dell Inc."),
DMI_MATCH(DMI_PRODUCT_NAME, "Dell G15 5525"),
}
@@ -717,6 +724,27 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
DMI_MATCH(DMI_BOARD_NAME, "PM1503CDA"),
}
},
+ {
+ .driver_data = &acp6x_card,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."),
+ DMI_MATCH(DMI_BOARD_NAME, "BM1403CDA"),
+ }
+ },
+ {
+ .driver_data = &acp6x_card,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Micro-Star International Co., Ltd."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Thin A15 B7VE"),
+ }
+ },
+ {
+ .driver_data = &acp6x_card,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "M7601RM"),
+ }
+ },
{}
};
diff --git a/sound/soc/cirrus/ep93xx-i2s.c b/sound/soc/cirrus/ep93xx-i2s.c
index cca01c03f048..5dba741594fa 100644
--- a/sound/soc/cirrus/ep93xx-i2s.c
+++ b/sound/soc/cirrus/ep93xx-i2s.c
@@ -91,16 +91,28 @@ static inline unsigned ep93xx_i2s_read_reg(struct ep93xx_i2s_info *info,
return __raw_readl(info->regs + reg);
}
-static void ep93xx_i2s_enable(struct ep93xx_i2s_info *info, int stream)
+static int ep93xx_i2s_enable(struct ep93xx_i2s_info *info, int stream)
{
unsigned base_reg;
+ int err;
if ((ep93xx_i2s_read_reg(info, EP93XX_I2S_TX0EN) & 0x1) == 0 &&
(ep93xx_i2s_read_reg(info, EP93XX_I2S_RX0EN) & 0x1) == 0) {
/* Enable clocks */
- clk_prepare_enable(info->mclk);
- clk_prepare_enable(info->sclk);
- clk_prepare_enable(info->lrclk);
+ err = clk_prepare_enable(info->mclk);
+ if (err)
+ return err;
+ err = clk_prepare_enable(info->sclk);
+ if (err) {
+ clk_disable_unprepare(info->mclk);
+ return err;
+ }
+ err = clk_prepare_enable(info->lrclk);
+ if (err) {
+ clk_disable_unprepare(info->sclk);
+ clk_disable_unprepare(info->mclk);
+ return err;
+ }
/* Enable i2s */
ep93xx_i2s_write_reg(info, EP93XX_I2S_GLCTRL, 1);
@@ -119,6 +131,8 @@ static void ep93xx_i2s_enable(struct ep93xx_i2s_info *info, int stream)
ep93xx_i2s_write_reg(info, EP93XX_I2S_TXCTRL,
EP93XX_I2S_TXCTRL_TXEMPTY_LVL |
EP93XX_I2S_TXCTRL_TXUFIE);
+
+ return 0;
}
static void ep93xx_i2s_disable(struct ep93xx_i2s_info *info, int stream)
@@ -195,9 +209,7 @@ static int ep93xx_i2s_startup(struct snd_pcm_substream *substream,
{
struct ep93xx_i2s_info *info = snd_soc_dai_get_drvdata(dai);
- ep93xx_i2s_enable(info, substream->stream);
-
- return 0;
+ return ep93xx_i2s_enable(info, substream->stream);
}
static void ep93xx_i2s_shutdown(struct snd_pcm_substream *substream,
@@ -373,14 +385,16 @@ static int ep93xx_i2s_suspend(struct snd_soc_component *component)
static int ep93xx_i2s_resume(struct snd_soc_component *component)
{
struct ep93xx_i2s_info *info = snd_soc_component_get_drvdata(component);
+ int err;
if (!snd_soc_component_active(component))
return 0;
- ep93xx_i2s_enable(info, SNDRV_PCM_STREAM_PLAYBACK);
- ep93xx_i2s_enable(info, SNDRV_PCM_STREAM_CAPTURE);
+ err = ep93xx_i2s_enable(info, SNDRV_PCM_STREAM_PLAYBACK);
+ if (err)
+ return err;
- return 0;
+ return ep93xx_i2s_enable(info, SNDRV_PCM_STREAM_CAPTURE);
}
#else
#define ep93xx_i2s_suspend NULL
diff --git a/sound/soc/codecs/adau1372.c b/sound/soc/codecs/adau1372.c
index fdee689cae53..d7363f9d53bb 100644
--- a/sound/soc/codecs/adau1372.c
+++ b/sound/soc/codecs/adau1372.c
@@ -762,7 +762,7 @@ static int adau1372_startup(struct snd_pcm_substream *substream, struct snd_soc_
return 0;
}
-static void adau1372_enable_pll(struct adau1372 *adau1372)
+static int adau1372_enable_pll(struct adau1372 *adau1372)
{
unsigned int val, timeout = 0;
int ret;
@@ -778,19 +778,26 @@ static void adau1372_enable_pll(struct adau1372 *adau1372)
timeout++;
} while (!(val & 1) && timeout < 3);
- if (ret < 0 || !(val & 1))
+ if (ret < 0 || !(val & 1)) {
dev_err(adau1372->dev, "Failed to lock PLL\n");
+ return ret < 0 ? ret : -ETIMEDOUT;
+ }
+
+ return 0;
}
-static void adau1372_set_power(struct adau1372 *adau1372, bool enable)
+static int adau1372_set_power(struct adau1372 *adau1372, bool enable)
{
if (adau1372->enabled == enable)
- return;
+ return 0;
if (enable) {
unsigned int clk_ctrl = ADAU1372_CLK_CTRL_MCLK_EN;
+ int ret;
- clk_prepare_enable(adau1372->mclk);
+ ret = clk_prepare_enable(adau1372->mclk);
+ if (ret)
+ return ret;
if (adau1372->pd_gpio)
gpiod_set_value(adau1372->pd_gpio, 0);
@@ -804,7 +811,14 @@ static void adau1372_set_power(struct adau1372 *adau1372, bool enable)
* accessed.
*/
if (adau1372->use_pll) {
- adau1372_enable_pll(adau1372);
+ ret = adau1372_enable_pll(adau1372);
+ if (ret) {
+ regcache_cache_only(adau1372->regmap, true);
+ if (adau1372->pd_gpio)
+ gpiod_set_value(adau1372->pd_gpio, 1);
+ clk_disable_unprepare(adau1372->mclk);
+ return ret;
+ }
clk_ctrl |= ADAU1372_CLK_CTRL_CLKSRC;
}
@@ -829,6 +843,8 @@ static void adau1372_set_power(struct adau1372 *adau1372, bool enable)
}
adau1372->enabled = enable;
+
+ return 0;
}
static int adau1372_set_bias_level(struct snd_soc_component *component,
@@ -842,11 +858,9 @@ static int adau1372_set_bias_level(struct snd_soc_component *component,
case SND_SOC_BIAS_PREPARE:
break;
case SND_SOC_BIAS_STANDBY:
- adau1372_set_power(adau1372, true);
- break;
+ return adau1372_set_power(adau1372, true);
case SND_SOC_BIAS_OFF:
- adau1372_set_power(adau1372, false);
- break;
+ return adau1372_set_power(adau1372, false);
}
return 0;
diff --git a/sound/soc/codecs/sma1307.c b/sound/soc/codecs/sma1307.c
index 4bb59e5c0891..5850bf6e71ca 100644
--- a/sound/soc/codecs/sma1307.c
+++ b/sound/soc/codecs/sma1307.c
@@ -1759,8 +1759,10 @@ static void sma1307_setting_loaded(struct sma1307_priv *sma1307, const char *fil
sma1307->set.mode_size * 2 * sizeof(int),
GFP_KERNEL);
if (!sma1307->set.mode_set[i]) {
- for (int j = 0; j < i; j++)
- kfree(sma1307->set.mode_set[j]);
+ for (int j = 0; j < i; j++) {
+ devm_kfree(sma1307->dev, sma1307->set.mode_set[j]);
+ sma1307->set.mode_set[j] = NULL;
+ }
sma1307->set.status = false;
return;
}
diff --git a/sound/soc/codecs/tas2781-fmwlib.c b/sound/soc/codecs/tas2781-fmwlib.c
index 5798d518d94c..a1d86bd309f4 100644
--- a/sound/soc/codecs/tas2781-fmwlib.c
+++ b/sound/soc/codecs/tas2781-fmwlib.c
@@ -2550,6 +2550,9 @@ static void tasdev_load_calibrated_data(struct tasdevice_priv *priv, int i)
int k = i * (cali_data->cali_dat_sz_per_dev + 1);
int rc;
+ if (!data || !cali_data->total_sz)
+ return;
+
if (data[k] != i) {
dev_err(priv->dev, "%s: no cal-data for dev %d from usr-spc\n",
__func__, i);
diff --git a/sound/soc/codecs/wcd934x.c b/sound/soc/codecs/wcd934x.c
index c8db33f78a1b..bc41a1466c70 100644
--- a/sound/soc/codecs/wcd934x.c
+++ b/sound/soc/codecs/wcd934x.c
@@ -2172,7 +2172,7 @@ static int wcd934x_init_dmic(struct snd_soc_component *comp)
u32 def_dmic_rate, dmic_clk_drv;
int ret;
- ret = wcd_dt_parse_mbhc_data(comp->dev, &wcd->mbhc_cfg);
+ ret = wcd_dt_parse_micbias_info(&wcd->common);
if (ret)
return ret;
diff --git a/sound/soc/fsl/imx-card.c b/sound/soc/fsl/imx-card.c
index 05b4e971a366..a4518fefad69 100644
--- a/sound/soc/fsl/imx-card.c
+++ b/sound/soc/fsl/imx-card.c
@@ -710,6 +710,8 @@ static int imx_card_parse_of(struct imx_card_data *data)
link->ops = &imx_aif_ops;
}
+ playback_only = false;
+ capture_only = false;
graph_util_parse_link_direction(np, &playback_only, &capture_only);
link->playback_only = playback_only;
link->capture_only = capture_only;
diff --git a/sound/soc/generic/simple-card-utils.c b/sound/soc/generic/simple-card-utils.c
index 9e5be0eaa77f..89d694c2cbdd 100644
--- a/sound/soc/generic/simple-card-utils.c
+++ b/sound/soc/generic/simple-card-utils.c
@@ -1183,9 +1183,9 @@ void graph_util_parse_link_direction(struct device_node *np,
bool is_playback_only = of_property_read_bool(np, "playback-only");
bool is_capture_only = of_property_read_bool(np, "capture-only");
- if (np && playback_only)
+ if (playback_only && is_playback_only)
*playback_only = is_playback_only;
- if (np && capture_only)
+ if (capture_only && is_capture_only)
*capture_only = is_capture_only;
}
EXPORT_SYMBOL_GPL(graph_util_parse_link_direction);
diff --git a/sound/soc/intel/boards/Kconfig b/sound/soc/intel/boards/Kconfig
index c5942b5655d3..d53af8f7e55b 100644
--- a/sound/soc/intel/boards/Kconfig
+++ b/sound/soc/intel/boards/Kconfig
@@ -530,8 +530,6 @@ config SND_SOC_INTEL_SOUNDWIRE_SOF_MACH
select SND_SOC_CS42L43_SDW
select MFD_CS42L43
select MFD_CS42L43_SDW
- select PINCTRL_CS42L43
- select SPI_CS42L43
select SND_SOC_CS35L56_SPI
select SND_SOC_CS35L56_SDW
select SND_SOC_DMIC
diff --git a/sound/soc/intel/boards/ehl_rt5660.c b/sound/soc/intel/boards/ehl_rt5660.c
index 5c7b218f22b7..c40cd9fb1a26 100644
--- a/sound/soc/intel/boards/ehl_rt5660.c
+++ b/sound/soc/intel/boards/ehl_rt5660.c
@@ -127,7 +127,7 @@ static int rt5660_hw_params(struct snd_pcm_substream *substream,
params_rate(params) * 50,
params_rate(params) * 512);
if (ret < 0)
- dev_err(codec_dai->dev, "can't set codec pll: %d\n", ret);
+ dev_err(rtd->dev, "can't set codec pll: %d\n", ret);
return ret;
}
diff --git a/sound/soc/intel/catpt/device.c b/sound/soc/intel/catpt/device.c
index 0638aecba40d..0b3f20e384c7 100644
--- a/sound/soc/intel/catpt/device.c
+++ b/sound/soc/intel/catpt/device.c
@@ -281,7 +281,15 @@ static int catpt_acpi_probe(struct platform_device *pdev)
if (IS_ERR(cdev->pci_ba))
return PTR_ERR(cdev->pci_ba);
- /* alloc buffer for storing DRAM context during dx transitions */
+ /*
+ * As per design HOST is responsible for preserving firmware's runtime
+ * context during D0 -> D3 -> D0 transitions. Addresses used for DMA
+ * to/from HOST memory shall be outside the reserved range of 0xFFFxxxxx.
+ */
+ ret = dma_coerce_mask_and_coherent(cdev->dev, DMA_BIT_MASK(31));
+ if (ret)
+ return ret;
+
cdev->dxbuf_vaddr = dmam_alloc_coherent(dev, catpt_dram_size(cdev),
&cdev->dxbuf_paddr, GFP_KERNEL);
if (!cdev->dxbuf_vaddr)
diff --git a/sound/soc/intel/catpt/dsp.c b/sound/soc/intel/catpt/dsp.c
index 008a20a2acbd..677f348909c8 100644
--- a/sound/soc/intel/catpt/dsp.c
+++ b/sound/soc/intel/catpt/dsp.c
@@ -125,9 +125,6 @@ int catpt_dmac_probe(struct catpt_dev *cdev)
dmac->dev = cdev->dev;
dmac->irq = cdev->irq;
- ret = dma_coerce_mask_and_coherent(cdev->dev, DMA_BIT_MASK(31));
- if (ret)
- return ret;
/*
* Caller is responsible for putting device in D0 to allow
* for I/O and memory access before probing DW.
diff --git a/sound/soc/sdca/sdca_functions.c b/sound/soc/sdca/sdca_functions.c
index e0ed593697ba..dca60ee8e62c 100644
--- a/sound/soc/sdca/sdca_functions.c
+++ b/sound/soc/sdca/sdca_functions.c
@@ -216,9 +216,6 @@ static int find_sdca_init_table(struct device *dev,
} else if (num_init_writes % sizeof(*raw) != 0) {
dev_err(dev, "%pfwP: init table size invalid\n", function_node);
return -EINVAL;
- } else if ((num_init_writes / sizeof(*raw)) > SDCA_MAX_INIT_COUNT) {
- dev_err(dev, "%pfwP: maximum init table size exceeded\n", function_node);
- return -EINVAL;
}
raw = kzalloc(num_init_writes, GFP_KERNEL);
@@ -1604,10 +1601,19 @@ static int find_sdca_entities(struct device *dev, struct sdw_slave *sdw,
static struct sdca_entity *find_sdca_entity_by_label(struct sdca_function_data *function,
const char *entity_label)
{
+ struct sdca_entity *entity = NULL;
int i;
for (i = 0; i < function->num_entities; i++) {
- struct sdca_entity *entity = &function->entities[i];
+ entity = &function->entities[i];
+
+ /* check whole string first*/
+ if (!strcmp(entity->label, entity_label))
+ return entity;
+ }
+
+ for (i = 0; i < function->num_entities; i++) {
+ entity = &function->entities[i];
if (!strncmp(entity->label, entity_label, strlen(entity_label)))
return entity;
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 573693e21780..ff6eb6bfc63b 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -2859,6 +2859,7 @@ int snd_soc_component_initialize(struct snd_soc_component *component,
INIT_LIST_HEAD(&component->dobj_list);
INIT_LIST_HEAD(&component->card_list);
INIT_LIST_HEAD(&component->list);
+ INIT_LIST_HEAD(&component->card_aux_list);
mutex_init(&component->io_mutex);
if (!component->name) {
diff --git a/sound/soc/sof/ipc4-topology.c b/sound/soc/sof/ipc4-topology.c
index db077e9d5644..c12ffdcfe4e3 100644
--- a/sound/soc/sof/ipc4-topology.c
+++ b/sound/soc/sof/ipc4-topology.c
@@ -2950,7 +2950,7 @@ static int sof_ipc4_control_load_bytes(struct snd_sof_dev *sdev, struct snd_sof_
return -EINVAL;
}
- if (scontrol->priv_size < sizeof(struct sof_abi_hdr)) {
+ if (scontrol->priv_size && scontrol->priv_size < sizeof(struct sof_abi_hdr)) {
dev_err(sdev->dev,
"bytes control %s initial data size %zu is insufficient.\n",
scontrol->name, scontrol->priv_size);
diff --git a/sound/soc/sof/topology.c b/sound/soc/sof/topology.c
index 18e2401152c8..35200d801fb7 100644
--- a/sound/soc/sof/topology.c
+++ b/sound/soc/sof/topology.c
@@ -736,7 +736,7 @@ static int sof_parse_token_sets(struct snd_soc_component *scomp,
asize = le32_to_cpu(array->size);
/* validate asize */
- if (asize < 0) { /* FIXME: A zero-size array makes no sense */
+ if (asize < sizeof(*array)) {
dev_err(scomp->dev, "error: invalid array size 0x%x\n",
asize);
return -EINVAL;
diff --git a/sound/usb/Kconfig b/sound/usb/Kconfig
index 9b890abd96d3..b4588915efa1 100644
--- a/sound/usb/Kconfig
+++ b/sound/usb/Kconfig
@@ -192,6 +192,7 @@ config SND_USB_AUDIO_QMI
tristate "Qualcomm Audio Offload driver"
depends on QCOM_QMI_HELPERS && SND_USB_AUDIO && SND_SOC_USB
depends on USB_XHCI_HCD && USB_XHCI_SIDEBAND
+ select AUXILIARY_BUS
help
Say Y here to enable the Qualcomm USB audio offloading feature.
diff --git a/sound/usb/caiaq/device.c b/sound/usb/caiaq/device.c
index dfd820483849..3a71bab8a477 100644
--- a/sound/usb/caiaq/device.c
+++ b/sound/usb/caiaq/device.c
@@ -488,7 +488,7 @@ static int init_card(struct snd_usb_caiaqdev *cdev)
memset(id, 0, sizeof(id));
for (c = card->shortname, len = 0;
- *c && len < sizeof(card->id); c++)
+ *c && len < sizeof(card->id) - 1; c++)
if (*c != ' ')
id[len++] = *c;
diff --git a/sound/usb/qcom/qc_audio_offload.c b/sound/usb/qcom/qc_audio_offload.c
index 510b68cced33..f161eb29f911 100644
--- a/sound/usb/qcom/qc_audio_offload.c
+++ b/sound/usb/qcom/qc_audio_offload.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Copyright (c) 2022-2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
*/
#include <linux/auxiliary_bus.h>
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 049a94079f9e..4cfa24c06fcd 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -2148,6 +2148,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
/* Device matches */
DEVICE_FLG(0x001f, 0x0b21, /* AB13X USB Audio */
QUIRK_FLAG_FORCE_IFACE_RESET | QUIRK_FLAG_IFACE_DELAY),
+ DEVICE_FLG(0x001f, 0x0b23, /* AB17X USB Audio */
+ QUIRK_FLAG_FORCE_IFACE_RESET | QUIRK_FLAG_IFACE_DELAY),
DEVICE_FLG(0x0020, 0x0b21, /* GHW-123P */
QUIRK_FLAG_FORCE_IFACE_RESET | QUIRK_FLAG_IFACE_DELAY),
DEVICE_FLG(0x03f0, 0x654a, /* HP 320 FHD Webcam */
@@ -2303,6 +2305,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
QUIRK_FLAG_PLAYBACK_FIRST | QUIRK_FLAG_GENERIC_IMPLICIT_FB),
DEVICE_FLG(0x13e5, 0x0001, /* Serato Phono */
QUIRK_FLAG_IGNORE_CTL_ERROR),
+ DEVICE_FLG(0x152a, 0x880a, /* NeuralDSP Quad Cortex */
+ 0), /* Doesn't have the vendor quirk which would otherwise apply */
DEVICE_FLG(0x154e, 0x1002, /* Denon DCD-1500RE */
QUIRK_FLAG_ITF_USB_DSD_DAC | QUIRK_FLAG_CTL_MSG_DELAY),
DEVICE_FLG(0x154e, 0x1003, /* Denon DA-300USB */
@@ -2429,6 +2433,10 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
QUIRK_FLAG_CTL_MSG_DELAY | QUIRK_FLAG_IFACE_DELAY),
VENDOR_FLG(0x07fd, /* MOTU */
QUIRK_FLAG_VALIDATE_RATES),
+ DEVICE_FLG(0x1235, 0x8006, 0), /* Focusrite Scarlett 2i2 1st Gen */
+ DEVICE_FLG(0x1235, 0x800a, 0), /* Focusrite Scarlett 2i4 1st Gen */
+ DEVICE_FLG(0x1235, 0x8016, 0), /* Focusrite Scarlett 2i2 1st Gen */
+ DEVICE_FLG(0x1235, 0x801c, 0), /* Focusrite Scarlett Solo 1st Gen */
VENDOR_FLG(0x1235, /* Focusrite Novation */
QUIRK_FLAG_SKIP_CLOCK_SELECTOR |
QUIRK_FLAG_SKIP_IFACE_SETUP),
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index da5275d8eda6..6673601246b3 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -740,7 +740,10 @@
#define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT)
#define MSR_AMD64_SNP_SECURE_AVIC_BIT 18
#define MSR_AMD64_SNP_SECURE_AVIC BIT_ULL(MSR_AMD64_SNP_SECURE_AVIC_BIT)
-#define MSR_AMD64_SNP_RESV_BIT 19
+#define MSR_AMD64_SNP_RESERVED_BITS19_22 GENMASK_ULL(22, 19)
+#define MSR_AMD64_SNP_IBPB_ON_ENTRY_BIT 23
+#define MSR_AMD64_SNP_IBPB_ON_ENTRY BIT_ULL(MSR_AMD64_SNP_IBPB_ON_ENTRY_BIT)
+#define MSR_AMD64_SNP_RESV_BIT 24
#define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT)
#define MSR_AMD64_SAVIC_CONTROL 0xc0010138
#define MSR_AMD64_SAVIC_EN_BIT 0
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 846a63215ce1..0d4538fa6c31 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -476,6 +476,7 @@ struct kvm_sync_regs {
#define KVM_X86_QUIRK_SLOT_ZAP_ALL (1 << 7)
#define KVM_X86_QUIRK_STUFF_FEATURE_MSRS (1 << 8)
#define KVM_X86_QUIRK_IGNORE_GUEST_PAT (1 << 9)
+#define KVM_X86_QUIRK_VMCS12_ALLOW_FREEZE_IN_SMM (1 << 10)
#define KVM_STATE_NESTED_FORMAT_VMX 0
#define KVM_STATE_NESTED_FORMAT_SVM 1
diff --git a/tools/include/linux/build_bug.h b/tools/include/linux/build_bug.h
index ab2aa97bd8ce..406923bd4846 100644
--- a/tools/include/linux/build_bug.h
+++ b/tools/include/linux/build_bug.h
@@ -32,7 +32,8 @@
/**
* BUILD_BUG_ON_MSG - break compile if a condition is true & emit supplied
* error message.
- * @condition: the condition which the compiler should know is false.
+ * @cond: the condition which the compiler should know is false.
+ * @msg: build-time error message
*
* See BUILD_BUG_ON for description.
*/
@@ -60,6 +61,7 @@
/**
* static_assert - check integer constant expression at build time
+ * @expr: expression to be checked
*
* static_assert() is a wrapper for the C11 _Static_assert, with a
* little macro magic to make the message optional (defaulting to the
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 65500f5db379..80364d4dbebb 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -14,6 +14,10 @@
#include <linux/ioctl.h>
#include <asm/kvm.h>
+#ifdef __KERNEL__
+#include <linux/kvm_types.h>
+#endif
+
#define KVM_API_VERSION 12
/*
@@ -1601,7 +1605,11 @@ struct kvm_stats_desc {
__u16 size;
__u32 offset;
__u32 bucket_size;
+#ifdef __KERNEL__
+ char name[KVM_STATS_NAME_SIZE];
+#else
char name[];
+#endif
};
#define KVM_GET_STATS_FD _IO(KVMIO, 0xce)
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index da3aca87457f..31826621eebd 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -187,7 +187,6 @@ done
check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memcpy_\(erms\|orig\))" -I"^#include <linux/cfi_types.h>"'
check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"'
check arch/x86/include/asm/amd/ibs.h '-I "^#include .*/msr-index.h"'
-check arch/arm64/include/asm/cputype.h '-I "^#include [<\"]\(asm/\)*sysreg.h"'
check include/linux/unaligned.h '-I "^#include <linux/unaligned/packed_struct.h>" -I "^#include <asm/byteorder.h>" -I "^#pragma GCC diagnostic"'
check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"'
check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"'
diff --git a/tools/perf/util/kvm-stat-arch/kvm-stat-x86.c b/tools/perf/util/kvm-stat-arch/kvm-stat-x86.c
index 43275d25b6cb..0f626db3a439 100644
--- a/tools/perf/util/kvm-stat-arch/kvm-stat-x86.c
+++ b/tools/perf/util/kvm-stat-arch/kvm-stat-x86.c
@@ -4,9 +4,9 @@
#include "../kvm-stat.h"
#include "../evsel.h"
#include "../env.h"
-#include "../../arch/x86/include/uapi/asm/svm.h"
-#include "../../arch/x86/include/uapi/asm/vmx.h"
-#include "../../arch/x86/include/uapi/asm/kvm.h"
+#include "../../../arch/x86/include/uapi/asm/svm.h"
+#include "../../../arch/x86/include/uapi/asm/vmx.h"
+#include "../../../arch/x86/include/uapi/asm/kvm.h"
#include <subcmd/parse-options.h>
define_exit_reasons_table(vmx_exit_reasons, VMX_EXIT_REASONS);
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 46bf4dfeebc8..7e39d469111b 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -1605,9 +1605,9 @@ bool metricgroup__has_metric_or_groups(const char *pmu, const char *metric_or_gr
.metric_or_groups = metric_or_groups,
};
- return pmu_metrics_table__for_each_metric(table,
- metricgroup__has_metric_or_groups_callback,
- &data)
+ return metricgroup__for_each_metric(table,
+ metricgroup__has_metric_or_groups_callback,
+ &data)
? true : false;
}
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index b9efb296bba5..7b4629625b1e 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1117,7 +1117,7 @@ static int config_attr(struct perf_event_attr *attr,
static struct evsel_config_term *add_config_term(enum evsel_term_type type,
struct list_head *head_terms,
- bool weak)
+ bool weak, char *str, u64 val)
{
struct evsel_config_term *t;
@@ -1128,8 +1128,62 @@ static struct evsel_config_term *add_config_term(enum evsel_term_type type,
INIT_LIST_HEAD(&t->list);
t->type = type;
t->weak = weak;
- list_add_tail(&t->list, head_terms);
+ switch (type) {
+ case EVSEL__CONFIG_TERM_PERIOD:
+ case EVSEL__CONFIG_TERM_FREQ:
+ case EVSEL__CONFIG_TERM_STACK_USER:
+ case EVSEL__CONFIG_TERM_USR_CHG_CONFIG:
+ case EVSEL__CONFIG_TERM_USR_CHG_CONFIG1:
+ case EVSEL__CONFIG_TERM_USR_CHG_CONFIG2:
+ case EVSEL__CONFIG_TERM_USR_CHG_CONFIG3:
+ case EVSEL__CONFIG_TERM_USR_CHG_CONFIG4:
+ t->val.val = val;
+ break;
+ case EVSEL__CONFIG_TERM_TIME:
+ t->val.time = val;
+ break;
+ case EVSEL__CONFIG_TERM_INHERIT:
+ t->val.inherit = val;
+ break;
+ case EVSEL__CONFIG_TERM_OVERWRITE:
+ t->val.overwrite = val;
+ break;
+ case EVSEL__CONFIG_TERM_MAX_STACK:
+ t->val.max_stack = val;
+ break;
+ case EVSEL__CONFIG_TERM_MAX_EVENTS:
+ t->val.max_events = val;
+ break;
+ case EVSEL__CONFIG_TERM_PERCORE:
+ t->val.percore = val;
+ break;
+ case EVSEL__CONFIG_TERM_AUX_OUTPUT:
+ t->val.aux_output = val;
+ break;
+ case EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE:
+ t->val.aux_sample_size = val;
+ break;
+ case EVSEL__CONFIG_TERM_CALLGRAPH:
+ case EVSEL__CONFIG_TERM_BRANCH:
+ case EVSEL__CONFIG_TERM_DRV_CFG:
+ case EVSEL__CONFIG_TERM_RATIO_TO_PREV:
+ case EVSEL__CONFIG_TERM_AUX_ACTION:
+ if (str) {
+ t->val.str = strdup(str);
+ if (!t->val.str) {
+ zfree(&t);
+ return NULL;
+ }
+ t->free_str = true;
+ }
+ break;
+ default:
+ t->val.val = val;
+ break;
+ }
+
+ list_add_tail(&t->list, head_terms);
return t;
}
@@ -1142,7 +1196,7 @@ static int get_config_terms(const struct parse_events_terms *head_config,
struct evsel_config_term *new_term;
enum evsel_term_type new_type;
bool str_type = false;
- u64 val;
+ u64 val = 0;
switch (term->type_term) {
case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
@@ -1234,20 +1288,15 @@ static int get_config_terms(const struct parse_events_terms *head_config,
continue;
}
- new_term = add_config_term(new_type, head_terms, term->weak);
+ /*
+ * Note: Members evsel_config_term::val and
+ * parse_events_term::val are unions and endianness needs
+ * to be taken into account when changing such union members.
+ */
+ new_term = add_config_term(new_type, head_terms, term->weak,
+ str_type ? term->val.str : NULL, val);
if (!new_term)
return -ENOMEM;
-
- if (str_type) {
- new_term->val.str = strdup(term->val.str);
- if (!new_term->val.str) {
- zfree(&new_term);
- return -ENOMEM;
- }
- new_term->free_str = true;
- } else {
- new_term->val.val = val;
- }
}
return 0;
}
@@ -1277,10 +1326,9 @@ static int add_cfg_chg(const struct perf_pmu *pmu,
if (bits) {
struct evsel_config_term *new_term;
- new_term = add_config_term(new_term_type, head_terms, false);
+ new_term = add_config_term(new_term_type, head_terms, false, NULL, bits);
if (!new_term)
return -ENOMEM;
- new_term->val.cfg_chg = bits;
}
return 0;
diff --git a/tools/testing/selftests/bpf/progs/verifier_precision.c b/tools/testing/selftests/bpf/progs/verifier_precision.c
index 1fe090cd6744..4794903aec8e 100644
--- a/tools/testing/selftests/bpf/progs/verifier_precision.c
+++ b/tools/testing/selftests/bpf/progs/verifier_precision.c
@@ -5,6 +5,13 @@
#include "../../../include/linux/filter.h"
#include "bpf_misc.h"
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} precision_map SEC(".maps");
+
SEC("?raw_tp")
__success __log_level(2)
__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r1 = r10")
@@ -301,4 +308,338 @@ __naked int bpf_neg_5(void)
::: __clobber_all);
}
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r2 stack= before 4: (bf) r3 = r10")
+__msg("mark_precise: frame0: regs=r2 stack= before 3: (db) r2 = atomic64_fetch_add((u64 *)(r10 -8), r2)")
+__msg("mark_precise: frame0: regs= stack=-8 before 2: (b7) r2 = 0")
+__msg("mark_precise: frame0: regs= stack=-8 before 1: (7b) *(u64 *)(r10 -8) = r1")
+__msg("mark_precise: frame0: regs=r1 stack= before 0: (b7) r1 = 8")
+__naked int bpf_atomic_fetch_add_precision(void)
+{
+ asm volatile (
+ "r1 = 8;"
+ "*(u64 *)(r10 - 8) = r1;"
+ "r2 = 0;"
+ ".8byte %[fetch_add_insn];" /* r2 = atomic_fetch_add(*(u64 *)(r10 - 8), r2) */
+ "r3 = r10;"
+ "r3 += r2;" /* mark_precise */
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_insn(fetch_add_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_ADD | BPF_FETCH, BPF_REG_10, BPF_REG_2, -8))
+ : __clobber_all);
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r2 stack= before 4: (bf) r3 = r10")
+__msg("mark_precise: frame0: regs=r2 stack= before 3: (db) r2 = atomic64_xchg((u64 *)(r10 -8), r2)")
+__msg("mark_precise: frame0: regs= stack=-8 before 2: (b7) r2 = 0")
+__msg("mark_precise: frame0: regs= stack=-8 before 1: (7b) *(u64 *)(r10 -8) = r1")
+__msg("mark_precise: frame0: regs=r1 stack= before 0: (b7) r1 = 8")
+__naked int bpf_atomic_xchg_precision(void)
+{
+ asm volatile (
+ "r1 = 8;"
+ "*(u64 *)(r10 - 8) = r1;"
+ "r2 = 0;"
+ ".8byte %[xchg_insn];" /* r2 = atomic_xchg(*(u64 *)(r10 - 8), r2) */
+ "r3 = r10;"
+ "r3 += r2;" /* mark_precise */
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_insn(xchg_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_XCHG, BPF_REG_10, BPF_REG_2, -8))
+ : __clobber_all);
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r2 stack= before 4: (bf) r3 = r10")
+__msg("mark_precise: frame0: regs=r2 stack= before 3: (db) r2 = atomic64_fetch_or((u64 *)(r10 -8), r2)")
+__msg("mark_precise: frame0: regs= stack=-8 before 2: (b7) r2 = 0")
+__msg("mark_precise: frame0: regs= stack=-8 before 1: (7b) *(u64 *)(r10 -8) = r1")
+__msg("mark_precise: frame0: regs=r1 stack= before 0: (b7) r1 = 8")
+__naked int bpf_atomic_fetch_or_precision(void)
+{
+ asm volatile (
+ "r1 = 8;"
+ "*(u64 *)(r10 - 8) = r1;"
+ "r2 = 0;"
+ ".8byte %[fetch_or_insn];" /* r2 = atomic_fetch_or(*(u64 *)(r10 - 8), r2) */
+ "r3 = r10;"
+ "r3 += r2;" /* mark_precise */
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_insn(fetch_or_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_OR | BPF_FETCH, BPF_REG_10, BPF_REG_2, -8))
+ : __clobber_all);
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r2 stack= before 4: (bf) r3 = r10")
+__msg("mark_precise: frame0: regs=r2 stack= before 3: (db) r2 = atomic64_fetch_and((u64 *)(r10 -8), r2)")
+__msg("mark_precise: frame0: regs= stack=-8 before 2: (b7) r2 = 0")
+__msg("mark_precise: frame0: regs= stack=-8 before 1: (7b) *(u64 *)(r10 -8) = r1")
+__msg("mark_precise: frame0: regs=r1 stack= before 0: (b7) r1 = 8")
+__naked int bpf_atomic_fetch_and_precision(void)
+{
+ asm volatile (
+ "r1 = 8;"
+ "*(u64 *)(r10 - 8) = r1;"
+ "r2 = 0;"
+ ".8byte %[fetch_and_insn];" /* r2 = atomic_fetch_and(*(u64 *)(r10 - 8), r2) */
+ "r3 = r10;"
+ "r3 += r2;" /* mark_precise */
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_insn(fetch_and_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_AND | BPF_FETCH, BPF_REG_10, BPF_REG_2, -8))
+ : __clobber_all);
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r2 stack= before 4: (bf) r3 = r10")
+__msg("mark_precise: frame0: regs=r2 stack= before 3: (db) r2 = atomic64_fetch_xor((u64 *)(r10 -8), r2)")
+__msg("mark_precise: frame0: regs= stack=-8 before 2: (b7) r2 = 0")
+__msg("mark_precise: frame0: regs= stack=-8 before 1: (7b) *(u64 *)(r10 -8) = r1")
+__msg("mark_precise: frame0: regs=r1 stack= before 0: (b7) r1 = 8")
+__naked int bpf_atomic_fetch_xor_precision(void)
+{
+ asm volatile (
+ "r1 = 8;"
+ "*(u64 *)(r10 - 8) = r1;"
+ "r2 = 0;"
+ ".8byte %[fetch_xor_insn];" /* r2 = atomic_fetch_xor(*(u64 *)(r10 - 8), r2) */
+ "r3 = r10;"
+ "r3 += r2;" /* mark_precise */
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_insn(fetch_xor_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_XOR | BPF_FETCH, BPF_REG_10, BPF_REG_2, -8))
+ : __clobber_all);
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r0 stack= before 5: (bf) r3 = r10")
+__msg("mark_precise: frame0: regs=r0 stack= before 4: (db) r0 = atomic64_cmpxchg((u64 *)(r10 -8), r0, r2)")
+__msg("mark_precise: frame0: regs= stack=-8 before 3: (b7) r2 = 0")
+__msg("mark_precise: frame0: regs= stack=-8 before 2: (b7) r0 = 0")
+__msg("mark_precise: frame0: regs= stack=-8 before 1: (7b) *(u64 *)(r10 -8) = r1")
+__msg("mark_precise: frame0: regs=r1 stack= before 0: (b7) r1 = 8")
+__naked int bpf_atomic_cmpxchg_precision(void)
+{
+ asm volatile (
+ "r1 = 8;"
+ "*(u64 *)(r10 - 8) = r1;"
+ "r0 = 0;"
+ "r2 = 0;"
+ ".8byte %[cmpxchg_insn];" /* r0 = atomic_cmpxchg(*(u64 *)(r10 - 8), r0, r2) */
+ "r3 = r10;"
+ "r3 += r0;" /* mark_precise */
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_insn(cmpxchg_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_2, -8))
+ : __clobber_all);
+}
+
+/* Regression test for dual precision: Both the fetched value (r2) and
+ * a reread of the same stack slot (r3) are tracked for precision. After
+ * the atomic operation, the stack slot is STACK_MISC. Thus, the ldx at
+ * insn 4 does NOT set INSN_F_STACK_ACCESS. Precision for the stack slot
+ * propagates solely through the atomic fetch's load side (insn 3).
+ */
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r2,r3 stack= before 4: (79) r3 = *(u64 *)(r10 -8)")
+__msg("mark_precise: frame0: regs=r2 stack= before 3: (db) r2 = atomic64_fetch_add((u64 *)(r10 -8), r2)")
+__msg("mark_precise: frame0: regs= stack=-8 before 2: (b7) r2 = 0")
+__msg("mark_precise: frame0: regs= stack=-8 before 1: (7b) *(u64 *)(r10 -8) = r1")
+__msg("mark_precise: frame0: regs=r1 stack= before 0: (b7) r1 = 8")
+__naked int bpf_atomic_fetch_add_dual_precision(void)
+{
+ asm volatile (
+ "r1 = 8;"
+ "*(u64 *)(r10 - 8) = r1;"
+ "r2 = 0;"
+ ".8byte %[fetch_add_insn];" /* r2 = atomic_fetch_add(*(u64 *)(r10 - 8), r2) */
+ "r3 = *(u64 *)(r10 - 8);"
+ "r4 = r2;"
+ "r4 += r3;"
+ "r4 &= 7;"
+ "r5 = r10;"
+ "r5 += r4;" /* mark_precise */
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_insn(fetch_add_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_ADD | BPF_FETCH, BPF_REG_10, BPF_REG_2, -8))
+ : __clobber_all);
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r0,r3 stack= before 5: (79) r3 = *(u64 *)(r10 -8)")
+__msg("mark_precise: frame0: regs=r0 stack= before 4: (db) r0 = atomic64_cmpxchg((u64 *)(r10 -8), r0, r2)")
+__msg("mark_precise: frame0: regs= stack=-8 before 3: (b7) r2 = 0")
+__msg("mark_precise: frame0: regs= stack=-8 before 2: (b7) r0 = 8")
+__msg("mark_precise: frame0: regs= stack=-8 before 1: (7b) *(u64 *)(r10 -8) = r1")
+__msg("mark_precise: frame0: regs=r1 stack= before 0: (b7) r1 = 8")
+__naked int bpf_atomic_cmpxchg_dual_precision(void)
+{
+ asm volatile (
+ "r1 = 8;"
+ "*(u64 *)(r10 - 8) = r1;"
+ "r0 = 8;"
+ "r2 = 0;"
+ ".8byte %[cmpxchg_insn];" /* r0 = atomic_cmpxchg(*(u64 *)(r10 - 8), r0, r2) */
+ "r3 = *(u64 *)(r10 - 8);"
+ "r4 = r0;"
+ "r4 += r3;"
+ "r4 &= 7;"
+ "r5 = r10;"
+ "r5 += r4;" /* mark_precise */
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_insn(cmpxchg_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_2, -8))
+ : __clobber_all);
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r1 stack= before 10: (57) r1 &= 7")
+__msg("mark_precise: frame0: regs=r1 stack= before 9: (db) r1 = atomic64_fetch_add((u64 *)(r0 +0), r1)")
+__not_msg("falling back to forcing all scalars precise")
+__naked int bpf_atomic_fetch_add_map_precision(void)
+{
+ asm volatile (
+ "r1 = 0;"
+ "*(u64 *)(r10 - 8) = r1;"
+ "r2 = r10;"
+ "r2 += -8;"
+ "r1 = %[precision_map] ll;"
+ "call %[bpf_map_lookup_elem];"
+ "if r0 == 0 goto 1f;"
+ "r1 = 0;"
+ ".8byte %[fetch_add_insn];" /* r1 = atomic_fetch_add(*(u64 *)(r0 + 0), r1) */
+ "r1 &= 7;"
+ "r2 = r10;"
+ "r2 += r1;" /* mark_precise */
+ "1: r0 = 0;"
+ "exit;"
+ :
+ : __imm_addr(precision_map),
+ __imm(bpf_map_lookup_elem),
+ __imm_insn(fetch_add_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_ADD | BPF_FETCH, BPF_REG_0, BPF_REG_1, 0))
+ : __clobber_all);
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r0 stack= before 12: (57) r0 &= 7")
+__msg("mark_precise: frame0: regs=r0 stack= before 11: (db) r0 = atomic64_cmpxchg((u64 *)(r6 +0), r0, r1)")
+__not_msg("falling back to forcing all scalars precise")
+__naked int bpf_atomic_cmpxchg_map_precision(void)
+{
+ asm volatile (
+ "r1 = 0;"
+ "*(u64 *)(r10 - 8) = r1;"
+ "r2 = r10;"
+ "r2 += -8;"
+ "r1 = %[precision_map] ll;"
+ "call %[bpf_map_lookup_elem];"
+ "if r0 == 0 goto 1f;"
+ "r6 = r0;"
+ "r0 = 0;"
+ "r1 = 0;"
+ ".8byte %[cmpxchg_insn];" /* r0 = atomic_cmpxchg(*(u64 *)(r6 + 0), r0, r1) */
+ "r0 &= 7;"
+ "r2 = r10;"
+ "r2 += r0;" /* mark_precise */
+ "1: r0 = 0;"
+ "exit;"
+ :
+ : __imm_addr(precision_map),
+ __imm(bpf_map_lookup_elem),
+ __imm_insn(cmpxchg_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_6, BPF_REG_1, 0))
+ : __clobber_all);
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r1 stack= before 10: (57) r1 &= 7")
+__msg("mark_precise: frame0: regs=r1 stack= before 9: (c3) r1 = atomic_fetch_add((u32 *)(r0 +0), r1)")
+__not_msg("falling back to forcing all scalars precise")
+__naked int bpf_atomic_fetch_add_32bit_precision(void)
+{
+ asm volatile (
+ "r1 = 0;"
+ "*(u64 *)(r10 - 8) = r1;"
+ "r2 = r10;"
+ "r2 += -8;"
+ "r1 = %[precision_map] ll;"
+ "call %[bpf_map_lookup_elem];"
+ "if r0 == 0 goto 1f;"
+ "r1 = 0;"
+ ".8byte %[fetch_add_insn];" /* r1 = atomic_fetch_add(*(u32 *)(r0 + 0), r1) */
+ "r1 &= 7;"
+ "r2 = r10;"
+ "r2 += r1;" /* mark_precise */
+ "1: r0 = 0;"
+ "exit;"
+ :
+ : __imm_addr(precision_map),
+ __imm(bpf_map_lookup_elem),
+ __imm_insn(fetch_add_insn,
+ BPF_ATOMIC_OP(BPF_W, BPF_ADD | BPF_FETCH, BPF_REG_0, BPF_REG_1, 0))
+ : __clobber_all);
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r0 stack= before 12: (57) r0 &= 7")
+__msg("mark_precise: frame0: regs=r0 stack= before 11: (c3) r0 = atomic_cmpxchg((u32 *)(r6 +0), r0, r1)")
+__not_msg("falling back to forcing all scalars precise")
+__naked int bpf_atomic_cmpxchg_32bit_precision(void)
+{
+ asm volatile (
+ "r1 = 0;"
+ "*(u64 *)(r10 - 8) = r1;"
+ "r2 = r10;"
+ "r2 += -8;"
+ "r1 = %[precision_map] ll;"
+ "call %[bpf_map_lookup_elem];"
+ "if r0 == 0 goto 1f;"
+ "r6 = r0;"
+ "r0 = 0;"
+ "r1 = 0;"
+ ".8byte %[cmpxchg_insn];" /* r0 = atomic_cmpxchg(*(u32 *)(r6 + 0), r0, r1) */
+ "r0 &= 7;"
+ "r2 = r10;"
+ "r2 += r0;" /* mark_precise */
+ "1: r0 = 0;"
+ "exit;"
+ :
+ : __imm_addr(precision_map),
+ __imm(bpf_map_lookup_elem),
+ __imm_insn(cmpxchg_insn,
+ BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, BPF_REG_6, BPF_REG_1, 0))
+ : __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/cgroup/lib/cgroup_util.c b/tools/testing/selftests/cgroup/lib/cgroup_util.c
index ce6c2642fd9b..6a7295347e90 100644
--- a/tools/testing/selftests/cgroup/lib/cgroup_util.c
+++ b/tools/testing/selftests/cgroup/lib/cgroup_util.c
@@ -123,6 +123,21 @@ int cg_read_strcmp(const char *cgroup, const char *control,
return ret;
}
+int cg_read_strcmp_wait(const char *cgroup, const char *control,
+ const char *expected)
+{
+ int i, ret;
+
+ for (i = 0; i < 100; i++) {
+ ret = cg_read_strcmp(cgroup, control, expected);
+ if (!ret)
+ return ret;
+ usleep(10000);
+ }
+
+ return ret;
+}
+
int cg_read_strstr(const char *cgroup, const char *control, const char *needle)
{
char buf[PAGE_SIZE];
diff --git a/tools/testing/selftests/cgroup/lib/include/cgroup_util.h b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h
index 77f386dab5e8..567b1082974c 100644
--- a/tools/testing/selftests/cgroup/lib/include/cgroup_util.h
+++ b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h
@@ -61,6 +61,8 @@ extern int cg_read(const char *cgroup, const char *control,
char *buf, size_t len);
extern int cg_read_strcmp(const char *cgroup, const char *control,
const char *expected);
+extern int cg_read_strcmp_wait(const char *cgroup, const char *control,
+ const char *expected);
extern int cg_read_strstr(const char *cgroup, const char *control,
const char *needle);
extern long cg_read_long(const char *cgroup, const char *control);
diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c
index 102262555a59..7b83c7e7c9d4 100644
--- a/tools/testing/selftests/cgroup/test_core.c
+++ b/tools/testing/selftests/cgroup/test_core.c
@@ -233,7 +233,8 @@ static int test_cgcore_populated(const char *root)
if (err)
goto cleanup;
- if (cg_read_strcmp(cg_test_d, "cgroup.events", "populated 0\n"))
+ if (cg_read_strcmp_wait(cg_test_d, "cgroup.events",
+ "populated 0\n"))
goto cleanup;
/* Remove cgroup. */
diff --git a/tools/testing/selftests/cgroup/test_kill.c b/tools/testing/selftests/cgroup/test_kill.c
index c8c9d306925b..f6cd23a8ecc7 100644
--- a/tools/testing/selftests/cgroup/test_kill.c
+++ b/tools/testing/selftests/cgroup/test_kill.c
@@ -86,7 +86,7 @@ cleanup:
wait_for_pid(pids[i]);
if (ret == KSFT_PASS &&
- cg_read_strcmp(cgroup, "cgroup.events", "populated 0\n"))
+ cg_read_strcmp_wait(cgroup, "cgroup.events", "populated 0\n"))
ret = KSFT_FAIL;
if (cgroup)
@@ -190,7 +190,8 @@ cleanup:
wait_for_pid(pids[i]);
if (ret == KSFT_PASS &&
- cg_read_strcmp(cgroup[0], "cgroup.events", "populated 0\n"))
+ cg_read_strcmp_wait(cgroup[0], "cgroup.events",
+ "populated 0\n"))
ret = KSFT_FAIL;
for (i = 9; i >= 0 && cgroup[i]; i--) {
@@ -251,7 +252,7 @@ cleanup:
wait_for_pid(pid);
if (ret == KSFT_PASS &&
- cg_read_strcmp(cgroup, "cgroup.events", "populated 0\n"))
+ cg_read_strcmp_wait(cgroup, "cgroup.events", "populated 0\n"))
ret = KSFT_FAIL;
if (cgroup)
diff --git a/tools/testing/selftests/drivers/net/team/Makefile b/tools/testing/selftests/drivers/net/team/Makefile
index 45a3e7ad3dcb..02d6f51d5a06 100644
--- a/tools/testing/selftests/drivers/net/team/Makefile
+++ b/tools/testing/selftests/drivers/net/team/Makefile
@@ -3,6 +3,7 @@
TEST_PROGS := \
dev_addr_lists.sh \
+ non_ether_header_ops.sh \
options.sh \
propagation.sh \
refleak.sh \
diff --git a/tools/testing/selftests/drivers/net/team/config b/tools/testing/selftests/drivers/net/team/config
index 558e1d0cf565..5d36a22ef080 100644
--- a/tools/testing/selftests/drivers/net/team/config
+++ b/tools/testing/selftests/drivers/net/team/config
@@ -1,7 +1,9 @@
+CONFIG_BONDING=y
CONFIG_DUMMY=y
CONFIG_IPV6=y
CONFIG_MACVLAN=y
CONFIG_NETDEVSIM=m
+CONFIG_NET_IPGRE=y
CONFIG_NET_TEAM=y
CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=y
CONFIG_NET_TEAM_MODE_LOADBALANCE=y
diff --git a/tools/testing/selftests/drivers/net/team/non_ether_header_ops.sh b/tools/testing/selftests/drivers/net/team/non_ether_header_ops.sh
new file mode 100755
index 000000000000..948a43576bdc
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/team/non_ether_header_ops.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# shellcheck disable=SC2154
+#
+# Reproduce the non-Ethernet header_ops confusion scenario with:
+# g0 (gre) -> b0 (bond) -> t0 (team)
+#
+# Before the fix, direct header_ops inheritance in this stack could call
+# callbacks with the wrong net_device context and crash.
+
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/lib.sh
+
+trap cleanup_all_ns EXIT
+
+setup_ns ns1
+
+ip -n "$ns1" link add d0 type dummy
+ip -n "$ns1" addr add 10.10.10.1/24 dev d0
+ip -n "$ns1" link set d0 up
+
+ip -n "$ns1" link add g0 type gre local 10.10.10.1
+ip -n "$ns1" link add b0 type bond mode active-backup
+ip -n "$ns1" link add t0 type team
+
+ip -n "$ns1" link set g0 master b0
+ip -n "$ns1" link set b0 master t0
+
+ip -n "$ns1" link set g0 up
+ip -n "$ns1" link set b0 up
+ip -n "$ns1" link set t0 up
+
+# IPv6 address assignment triggers MLD join reports that call
+# dev_hard_header() on t0, exercising the inherited header_ops path.
+ip -n "$ns1" -6 addr add 2001:db8:1::1/64 dev t0 nodad
+for i in $(seq 1 20); do
+ ip netns exec "$ns1" ping -6 -I t0 ff02::1 -c1 -W1 &>/dev/null || true
+done
+
+echo "PASS: non-Ethernet header_ops stacking did not crash"
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
index dc68371f76a3..6471fa214a9f 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -206,6 +206,7 @@ TEST_GEN_PROGS_s390 += s390/ucontrol_test
TEST_GEN_PROGS_s390 += s390/user_operexec
TEST_GEN_PROGS_s390 += s390/keyop
TEST_GEN_PROGS_s390 += rseq_test
+TEST_GEN_PROGS_s390 += s390/irq_routing
TEST_GEN_PROGS_riscv = $(TEST_GEN_PROGS_COMMON)
TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test
diff --git a/tools/testing/selftests/kvm/s390/irq_routing.c b/tools/testing/selftests/kvm/s390/irq_routing.c
new file mode 100644
index 000000000000..7819a0af19a8
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390/irq_routing.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * IRQ routing offset tests.
+ *
+ * Copyright IBM Corp. 2026
+ *
+ * Authors:
+ * Janosch Frank <frankja@linux.ibm.com>
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kselftest.h"
+#include "ucall_common.h"
+
+extern char guest_code[];
+asm("guest_code:\n"
+ "diag %r0,%r0,0\n"
+ "j .\n");
+
+static void test(void)
+{
+ struct kvm_irq_routing *routing;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ vm_paddr_t mem;
+ int ret;
+
+ struct kvm_irq_routing_entry ue = {
+ .type = KVM_IRQ_ROUTING_S390_ADAPTER,
+ .gsi = 1,
+ };
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ mem = vm_phy_pages_alloc(vm, 2, 4096 * 42, 0);
+
+ routing = kvm_gsi_routing_create();
+ routing->nr = 1;
+ routing->entries[0] = ue;
+ routing->entries[0].u.adapter.summary_addr = (uintptr_t)mem;
+ routing->entries[0].u.adapter.ind_addr = (uintptr_t)mem;
+
+ routing->entries[0].u.adapter.summary_offset = 4096 * 8;
+ ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing);
+ ksft_test_result(ret == -1 && errno == EINVAL, "summary offset outside of page\n");
+
+ routing->entries[0].u.adapter.summary_offset -= 4;
+ ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing);
+ ksft_test_result(ret == 0, "summary offset inside of page\n");
+
+ routing->entries[0].u.adapter.ind_offset = 4096 * 8;
+ ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing);
+ ksft_test_result(ret == -1 && errno == EINVAL, "ind offset outside of page\n");
+
+ routing->entries[0].u.adapter.ind_offset -= 4;
+ ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing);
+ ksft_test_result(ret == 0, "ind offset inside of page\n");
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_IRQ_ROUTING));
+
+ ksft_print_header();
+ ksft_set_plan(4);
+ test();
+
+ ksft_finished(); /* Print results and exit() accordingly */
+}
diff --git a/tools/testing/selftests/landlock/tsync_test.c b/tools/testing/selftests/landlock/tsync_test.c
index 37ef0d2270db..2b9ad4f154f4 100644
--- a/tools/testing/selftests/landlock/tsync_test.c
+++ b/tools/testing/selftests/landlock/tsync_test.c
@@ -6,9 +6,10 @@
*/
#define _GNU_SOURCE
+#include <linux/landlock.h>
#include <pthread.h>
+#include <signal.h>
#include <sys/prctl.h>
-#include <linux/landlock.h>
#include "common.h"
@@ -158,4 +159,92 @@ TEST(competing_enablement)
EXPECT_EQ(0, close(ruleset_fd));
}
+static void signal_nop_handler(int sig)
+{
+}
+
+struct signaler_data {
+ pthread_t target;
+ volatile bool stop;
+};
+
+static void *signaler_thread(void *data)
+{
+ struct signaler_data *sd = data;
+
+ while (!sd->stop)
+ pthread_kill(sd->target, SIGUSR1);
+
+ return NULL;
+}
+
+/*
+ * Number of idle sibling threads. This must be large enough that even on
+ * machines with many cores, the sibling threads cannot all complete their
+ * credential preparation in a single parallel wave, otherwise the signaler
+ * thread has no window to interrupt wait_for_completion_interruptible().
+ * 200 threads on a 64-core machine yields ~3 serialized waves, giving the
+ * tight signal loop enough time to land an interruption.
+ */
+#define NUM_IDLE_THREADS 200
+
+/*
+ * Exercises the tsync interruption and cancellation paths in tsync.c.
+ *
+ * When a signal interrupts the calling thread while it waits for sibling
+ * threads to finish their credential preparation
+ * (wait_for_completion_interruptible in landlock_restrict_sibling_threads),
+ * the kernel sets ERESTARTNOINTR, cancels queued task works that have not
+ * started yet (cancel_tsync_works), then waits for the remaining works to
+ * finish. On the error return, syscalls.c aborts the prepared credentials.
+ * The kernel automatically restarts the syscall, so userspace sees success.
+ */
+TEST(tsync_interrupt)
+{
+ size_t i;
+ pthread_t threads[NUM_IDLE_THREADS];
+ pthread_t signaler;
+ struct signaler_data sd;
+ struct sigaction sa = {};
+ const int ruleset_fd = create_ruleset(_metadata);
+
+ disable_caps(_metadata);
+
+ /* Install a no-op SIGUSR1 handler so the signal does not kill us. */
+ sa.sa_handler = signal_nop_handler;
+ sigemptyset(&sa.sa_mask);
+ ASSERT_EQ(0, sigaction(SIGUSR1, &sa, NULL));
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+
+ for (i = 0; i < NUM_IDLE_THREADS; i++)
+ ASSERT_EQ(0, pthread_create(&threads[i], NULL, idle, NULL));
+
+ /*
+ * Start a signaler thread that continuously sends SIGUSR1 to the
+ * calling thread. This maximizes the chance of interrupting
+ * wait_for_completion_interruptible() in the kernel's tsync path.
+ */
+ sd.target = pthread_self();
+ sd.stop = false;
+ ASSERT_EQ(0, pthread_create(&signaler, NULL, signaler_thread, &sd));
+
+ /*
+ * The syscall may be interrupted and transparently restarted by the
+ * kernel (ERESTARTNOINTR). From userspace, it should always succeed.
+ */
+ EXPECT_EQ(0, landlock_restrict_self(ruleset_fd,
+ LANDLOCK_RESTRICT_SELF_TSYNC));
+
+ sd.stop = true;
+ ASSERT_EQ(0, pthread_join(signaler, NULL));
+
+ for (i = 0; i < NUM_IDLE_THREADS; i++) {
+ ASSERT_EQ(0, pthread_cancel(threads[i]));
+ ASSERT_EQ(0, pthread_join(threads[i], NULL));
+ }
+
+ EXPECT_EQ(0, close(ruleset_fd));
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
index 7aec3ae82a44..c6dafb3cc116 100644
--- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c
+++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
@@ -1020,7 +1020,7 @@ FIXTURE_SETUP(mount_setattr_idmapped)
"size=100000,mode=700"), 0);
ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV,
- "size=2m,mode=700"), 0);
+ "size=256m,mode=700"), 0);
ASSERT_EQ(mkdir("/mnt/A", 0777), 0);
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index c5694cc4ddd2..829f72c8ee07 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -868,6 +868,64 @@ fib6_gc_test()
check_rt_num 5 $($IP -6 route list |grep -v expires|grep 2001:20::|wc -l)
log_test $ret 0 "ipv6 route garbage collection (replace with permanent)"
+ # Delete dummy_10 and remove all routes
+ $IP link del dev dummy_10
+
+ # rd6 is required for the next test. (ipv6toolkit)
+ if [ ! -x "$(command -v rd6)" ]; then
+ echo "SKIP: rd6 not found."
+ set +e
+ cleanup &> /dev/null
+ return
+ fi
+
+ setup_ns ns2
+ $IP link add veth1 type veth peer veth2 netns $ns2
+ $IP link set veth1 up
+ ip -netns $ns2 link set veth2 up
+ $IP addr add fe80:dead::1/64 dev veth1
+ ip -netns $ns2 addr add fe80:dead::2/64 dev veth2
+
+ # Add NTF_ROUTER neighbour to prevent rt6_age_examine_exception()
+ # from removing not-yet-expired exceptions.
+ ip -netns $ns2 link set veth2 address 00:11:22:33:44:55
+ $IP neigh add fe80:dead::3 lladdr 00:11:22:33:44:55 dev veth1 router
+
+ $NS_EXEC sysctl -wq net.ipv6.conf.veth1.accept_redirects=1
+ $NS_EXEC sysctl -wq net.ipv6.conf.veth1.forwarding=0
+
+ # Temporary routes
+ for i in $(seq 1 5); do
+ # Expire route after $EXPIRE seconds
+ $IP -6 route add 2001:10::$i \
+ via fe80:dead::2 dev veth1 expires $EXPIRE
+
+ ip netns exec $ns2 rd6 -i veth2 \
+ -s fe80:dead::2 -d fe80:dead::1 \
+ -r 2001:10::$i -t fe80:dead::3 -p ICMP6
+ done
+
+ check_rt_num 5 $($IP -6 route list | grep expires | grep 2001:10:: | wc -l)
+
+ # Promote to permanent routes by "prepend" (w/o NLM_F_EXCL and NLM_F_REPLACE)
+ for i in $(seq 1 5); do
+ # -EEXIST, but the temporary route becomes the permanent route.
+ $IP -6 route append 2001:10::$i \
+ via fe80:dead::2 dev veth1 2>/dev/null || true
+ done
+
+ check_rt_num 5 $($IP -6 route list | grep -v expires | grep 2001:10:: | wc -l)
+ check_rt_num 5 $($IP -6 route list cache | grep 2001:10:: | wc -l)
+
+ # Trigger GC instead of waiting $GC_WAIT_TIME.
+ # rt6_nh_dump_exceptions() just skips expired exceptions.
+ $NS_EXEC sysctl -wq net.ipv6.route.flush=1
+ check_rt_num 0 $($IP -6 route list cache | grep 2001:10:: | wc -l)
+ log_test $ret 0 "ipv6 route garbage collection (promote to permanent routes)"
+
+ $IP neigh del fe80:dead::3 lladdr 00:11:22:33:44:55 dev veth1 router
+ $IP link del veth1
+
# ra6 is required for the next test. (ipv6toolkit)
if [ ! -x "$(command -v ra6)" ]; then
echo "SKIP: ra6 not found."
@@ -876,9 +934,6 @@ fib6_gc_test()
return
fi
- # Delete dummy_10 and remove all routes
- $IP link del dev dummy_10
-
# Create a pair of veth devices to send a RA message from one
# device to another.
$IP link add veth1 type veth peer name veth2
diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
index 394166f224a4..ffdc6ccc6511 100755
--- a/tools/testing/selftests/net/netfilter/nft_concat_range.sh
+++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
@@ -29,7 +29,8 @@ TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto
net6_port_net6_port net_port_mac_proto_net"
# Reported bugs, also described by TYPE_ variables below
-BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch doublecreate insert_overlap"
+BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch doublecreate
+ insert_overlap load_flush_load4 load_flush_load8"
# List of possible paths to pktgen script from kernel tree for performance tests
PKTGEN_SCRIPT_PATHS="
@@ -432,6 +433,30 @@ race_repeat 0
perf_duration 0
"
+TYPE_load_flush_load4="
+display reload with flush, 4bit groups
+type_spec ipv4_addr . ipv4_addr
+chain_spec ip saddr . ip daddr
+dst addr4
+proto icmp
+
+race_repeat 0
+
+perf_duration 0
+"
+
+TYPE_load_flush_load8="
+display reload with flush, 8bit groups
+type_spec ipv4_addr . ipv4_addr
+chain_spec ip saddr . ip daddr
+dst addr4
+proto icmp
+
+race_repeat 0
+
+perf_duration 0
+"
+
# Set template for all tests, types and rules are filled in depending on test
set_template='
flush ruleset
@@ -1997,6 +2022,49 @@ test_bug_insert_overlap()
return 0
}
+test_bug_load_flush_load4()
+{
+ local i
+
+ setup veth send_"${proto}" set || return ${ksft_skip}
+
+ for i in $(seq 0 255); do
+ local addelem="add element inet filter test"
+ local j
+
+ for j in $(seq 0 20); do
+ echo "$addelem { 10.$j.0.$i . 10.$j.1.$i }"
+ echo "$addelem { 10.$j.0.$i . 10.$j.2.$i }"
+ done
+ done > "$tmp"
+
+ nft -f "$tmp" || return 1
+
+ ( echo "flush set inet filter test";cat "$tmp") | nft -f -
+ [ $? -eq 0 ] || return 1
+
+ return 0
+}
+
+test_bug_load_flush_load8()
+{
+ local i
+
+ setup veth send_"${proto}" set || return ${ksft_skip}
+
+ for i in $(seq 1 100); do
+ echo "add element inet filter test { 10.0.0.$i . 10.0.1.$i }"
+ echo "add element inet filter test { 10.0.0.$i . 10.0.2.$i }"
+ done > "$tmp"
+
+ nft -f "$tmp" || return 1
+
+ ( echo "flush set inet filter test";cat "$tmp") | nft -f -
+ [ $? -eq 0 ] || return 1
+
+ return 0
+}
+
test_reported_issues() {
eval test_bug_"${subtest}"
}
diff --git a/tools/testing/selftests/sched_ext/Makefile b/tools/testing/selftests/sched_ext/Makefile
index 006300ac6dff..1c9ca328cca1 100644
--- a/tools/testing/selftests/sched_ext/Makefile
+++ b/tools/testing/selftests/sched_ext/Makefile
@@ -188,6 +188,7 @@ auto-test-targets := \
rt_stall \
test_example \
total_bw \
+ cyclic_kick_wait \
testcase-targets := $(addsuffix .o,$(addprefix $(SCXOBJ_DIR)/,$(auto-test-targets)))
diff --git a/tools/testing/selftests/sched_ext/cyclic_kick_wait.bpf.c b/tools/testing/selftests/sched_ext/cyclic_kick_wait.bpf.c
new file mode 100644
index 000000000000..cb34d3335917
--- /dev/null
+++ b/tools/testing/selftests/sched_ext/cyclic_kick_wait.bpf.c
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Stress concurrent SCX_KICK_WAIT calls to reproduce wait-cycle deadlock.
+ *
+ * Three CPUs are designated from userspace. Every enqueue from one of the
+ * three CPUs kicks the next CPU in the ring with SCX_KICK_WAIT, creating a
+ * persistent A -> B -> C -> A wait cycle pressure.
+ */
+#include <scx/common.bpf.h>
+
+char _license[] SEC("license") = "GPL";
+
+const volatile s32 test_cpu_a;
+const volatile s32 test_cpu_b;
+const volatile s32 test_cpu_c;
+
+u64 nr_enqueues;
+u64 nr_wait_kicks;
+
+UEI_DEFINE(uei);
+
+static s32 target_cpu(s32 cpu)
+{
+ if (cpu == test_cpu_a)
+ return test_cpu_b;
+ if (cpu == test_cpu_b)
+ return test_cpu_c;
+ if (cpu == test_cpu_c)
+ return test_cpu_a;
+ return -1;
+}
+
+void BPF_STRUCT_OPS(cyclic_kick_wait_enqueue, struct task_struct *p,
+ u64 enq_flags)
+{
+ s32 this_cpu = bpf_get_smp_processor_id();
+ s32 tgt;
+
+ __sync_fetch_and_add(&nr_enqueues, 1);
+
+ if (p->flags & PF_KTHREAD) {
+ scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, SCX_SLICE_INF,
+ enq_flags | SCX_ENQ_PREEMPT);
+ return;
+ }
+
+ scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags);
+
+ tgt = target_cpu(this_cpu);
+ if (tgt < 0 || tgt == this_cpu)
+ return;
+
+ __sync_fetch_and_add(&nr_wait_kicks, 1);
+ scx_bpf_kick_cpu(tgt, SCX_KICK_WAIT);
+}
+
+void BPF_STRUCT_OPS(cyclic_kick_wait_exit, struct scx_exit_info *ei)
+{
+ UEI_RECORD(uei, ei);
+}
+
+SEC(".struct_ops.link")
+struct sched_ext_ops cyclic_kick_wait_ops = {
+ .enqueue = cyclic_kick_wait_enqueue,
+ .exit = cyclic_kick_wait_exit,
+ .name = "cyclic_kick_wait",
+ .timeout_ms = 1000U,
+};
diff --git a/tools/testing/selftests/sched_ext/cyclic_kick_wait.c b/tools/testing/selftests/sched_ext/cyclic_kick_wait.c
new file mode 100644
index 000000000000..c2e5aa9de715
--- /dev/null
+++ b/tools/testing/selftests/sched_ext/cyclic_kick_wait.c
@@ -0,0 +1,194 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Test SCX_KICK_WAIT forward progress under cyclic wait pressure.
+ *
+ * SCX_KICK_WAIT busy-waits until the target CPU enters the scheduling path.
+ * If multiple CPUs form a wait cycle (A waits for B, B waits for C, C waits
+ * for A), all CPUs deadlock unless the implementation breaks the cycle.
+ *
+ * This test creates that scenario: three CPUs are arranged in a ring. The BPF
+ * scheduler's ops.enqueue() kicks the next CPU in the ring with SCX_KICK_WAIT
+ * on every enqueue. Userspace pins 4 worker threads per CPU that loop calling
+ * sched_yield(), generating a steady stream of enqueues and thus sustained
+ * A->B->C->A kick_wait cycle pressure. The test passes if the system remains
+ * responsive for 5 seconds without the scheduler being killed by the watchdog.
+ */
+#define _GNU_SOURCE
+
+#include <bpf/bpf.h>
+#include <errno.h>
+#include <pthread.h>
+#include <sched.h>
+#include <scx/common.h>
+#include <stdint.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "scx_test.h"
+#include "cyclic_kick_wait.bpf.skel.h"
+
+#define WORKERS_PER_CPU 4
+#define NR_TEST_CPUS 3
+#define NR_WORKERS (NR_TEST_CPUS * WORKERS_PER_CPU)
+
+struct worker_ctx {
+ pthread_t tid;
+ int cpu;
+ volatile bool stop;
+ volatile __u64 iters;
+ bool started;
+};
+
+static void *worker_fn(void *arg)
+{
+ struct worker_ctx *worker = arg;
+ cpu_set_t mask;
+
+ CPU_ZERO(&mask);
+ CPU_SET(worker->cpu, &mask);
+
+ if (sched_setaffinity(0, sizeof(mask), &mask))
+ return (void *)(uintptr_t)errno;
+
+ while (!worker->stop) {
+ sched_yield();
+ worker->iters++;
+ }
+
+ return NULL;
+}
+
+static int join_worker(struct worker_ctx *worker)
+{
+ void *ret;
+ struct timespec ts;
+ int err;
+
+ if (!worker->started)
+ return 0;
+
+ if (clock_gettime(CLOCK_REALTIME, &ts))
+ return -errno;
+
+ ts.tv_sec += 2;
+ err = pthread_timedjoin_np(worker->tid, &ret, &ts);
+ if (err == ETIMEDOUT)
+ pthread_detach(worker->tid);
+ if (err)
+ return -err;
+
+ if ((uintptr_t)ret)
+ return -(int)(uintptr_t)ret;
+
+ return 0;
+}
+
+static enum scx_test_status setup(void **ctx)
+{
+ struct cyclic_kick_wait *skel;
+
+ skel = cyclic_kick_wait__open();
+ SCX_FAIL_IF(!skel, "Failed to open skel");
+ SCX_ENUM_INIT(skel);
+
+ *ctx = skel;
+ return SCX_TEST_PASS;
+}
+
+static enum scx_test_status run(void *ctx)
+{
+ struct cyclic_kick_wait *skel = ctx;
+ struct worker_ctx workers[NR_WORKERS] = {};
+ struct bpf_link *link = NULL;
+ enum scx_test_status status = SCX_TEST_PASS;
+ int test_cpus[NR_TEST_CPUS];
+ int nr_cpus = 0;
+ cpu_set_t mask;
+ int ret, i;
+
+ if (sched_getaffinity(0, sizeof(mask), &mask)) {
+ SCX_ERR("Failed to get affinity (%d)", errno);
+ return SCX_TEST_FAIL;
+ }
+
+ for (i = 0; i < CPU_SETSIZE; i++) {
+ if (CPU_ISSET(i, &mask))
+ test_cpus[nr_cpus++] = i;
+ if (nr_cpus == NR_TEST_CPUS)
+ break;
+ }
+
+ if (nr_cpus < NR_TEST_CPUS)
+ return SCX_TEST_SKIP;
+
+ skel->rodata->test_cpu_a = test_cpus[0];
+ skel->rodata->test_cpu_b = test_cpus[1];
+ skel->rodata->test_cpu_c = test_cpus[2];
+
+ if (cyclic_kick_wait__load(skel)) {
+ SCX_ERR("Failed to load skel");
+ return SCX_TEST_FAIL;
+ }
+
+ link = bpf_map__attach_struct_ops(skel->maps.cyclic_kick_wait_ops);
+ if (!link) {
+ SCX_ERR("Failed to attach scheduler");
+ return SCX_TEST_FAIL;
+ }
+
+ for (i = 0; i < NR_WORKERS; i++)
+ workers[i].cpu = test_cpus[i / WORKERS_PER_CPU];
+
+ for (i = 0; i < NR_WORKERS; i++) {
+ ret = pthread_create(&workers[i].tid, NULL, worker_fn, &workers[i]);
+ if (ret) {
+ SCX_ERR("Failed to create worker thread %d (%d)", i, ret);
+ status = SCX_TEST_FAIL;
+ goto out;
+ }
+ workers[i].started = true;
+ }
+
+ sleep(5);
+
+ if (skel->data->uei.kind != EXIT_KIND(SCX_EXIT_NONE)) {
+ SCX_ERR("Scheduler exited unexpectedly (kind=%llu code=%lld)",
+ (unsigned long long)skel->data->uei.kind,
+ (long long)skel->data->uei.exit_code);
+ status = SCX_TEST_FAIL;
+ }
+
+out:
+ for (i = 0; i < NR_WORKERS; i++)
+ workers[i].stop = true;
+
+ for (i = 0; i < NR_WORKERS; i++) {
+ ret = join_worker(&workers[i]);
+ if (ret && status == SCX_TEST_PASS) {
+ SCX_ERR("Failed to join worker thread %d (%d)", i, ret);
+ status = SCX_TEST_FAIL;
+ }
+ }
+
+ if (link)
+ bpf_link__destroy(link);
+
+ return status;
+}
+
+static void cleanup(void *ctx)
+{
+ struct cyclic_kick_wait *skel = ctx;
+
+ cyclic_kick_wait__destroy(skel);
+}
+
+struct scx_test cyclic_kick_wait = {
+ .name = "cyclic_kick_wait",
+ .description = "Verify SCX_KICK_WAIT forward progress under a 3-CPU wait cycle",
+ .setup = setup,
+ .run = run,
+ .cleanup = cleanup,
+};
+REGISTER_SCX_TEST(&cyclic_kick_wait)
diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/filter.json b/tools/testing/selftests/tc-testing/tc-tests/infra/filter.json
index 8d10042b489b..dbce6436ed26 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/infra/filter.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/infra/filter.json
@@ -22,5 +22,49 @@
"teardown": [
"$TC qdisc del dev $DUMMY root handle 1: htb default 1"
]
+ },
+ {
+ "id": "b7e3",
+ "name": "Empty fw filter on shared block - rejected at config time",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 egress_block 1 clsact"
+ ],
+ "cmdUnderTest": "$TC filter add block 1 protocol ip prio 1 fw",
+ "expExitCode": "2",
+ "verifyCmd": "$TC filter show block 1",
+ "matchPattern": "fw",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 clsact"
+ ]
+ },
+ {
+ "id": "c8f4",
+ "name": "Flow filter on shared block without baseclass - rejected at config time",
+ "category": [
+ "filter",
+ "flow"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress_block 1 clsact"
+ ],
+ "cmdUnderTest": "$TC filter add block 1 protocol ip prio 1 handle 1 flow map key dst",
+ "expExitCode": "2",
+ "verifyCmd": "$TC filter show block 1",
+ "matchPattern": "flow",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 clsact"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
index 6a39640aa2a8..1e5efb2a31eb 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
@@ -1111,5 +1111,30 @@
"teardown": [
"$TC qdisc del dev $DUMMY root handle 1:"
]
+ },
+ {
+ "id": "a3d7",
+ "name": "HFSC with large m1 - no divide-by-zero on class reactivation",
+ "category": [
+ "qdisc",
+ "hfsc"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc replace dev $DUMMY root handle 1: hfsc default 1",
+ "$TC class replace dev $DUMMY parent 1: classid 1:1 hfsc rt m1 32gbit d 1ms m2 0bit ls m1 32gbit d 1ms m2 0bit",
+ "ping -I$DUMMY -f -c1 -s64 -W1 10.10.10.1 || true",
+ "sleep 1"
+ ],
+ "cmdUnderTest": "ping -I$DUMMY -f -c1 -s64 -W1 10.10.10.1 || true",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc hfsc 1: root",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
}
]
diff --git a/tools/tracing/rtla/src/timerlat_bpf.h b/tools/tracing/rtla/src/timerlat_bpf.h
index 169abeaf4363..f7c5675737fe 100644
--- a/tools/tracing/rtla/src/timerlat_bpf.h
+++ b/tools/tracing/rtla/src/timerlat_bpf.h
@@ -12,7 +12,6 @@ enum summary_field {
};
#ifndef __bpf__
-#include <bpf/libbpf.h>
#ifdef HAVE_BPF_SKEL
int timerlat_bpf_init(struct timerlat_params *params);
int timerlat_bpf_attach(void);